apache_log_report 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  module ApacheLogReport
2
- VERSION = "0.9.0"
2
+ VERSION = "0.9.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apache_log_report
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adolfo Villafiorita
@@ -10,6 +10,20 @@ bindir: exe
10
10
  cert_chain: []
11
11
  date: 2020-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: apache_log-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: browser
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -38,6 +52,20 @@ dependencies:
38
52
  - - ">="
39
53
  - !ruby/object:Gem::Version
40
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: terminal-table
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
41
69
  description: Generate a request report in OrgMode format from an Apache log file.
42
70
  email:
43
71
  - adolfo.villafiorita@ict4g.net
@@ -57,9 +85,6 @@ files:
57
85
  - bin/setup
58
86
  - exe/apache_log_report
59
87
  - lib/apache_log_report.rb
60
- - lib/apache_log_report/log_parser_hash.rb
61
- - lib/apache_log_report/log_parser_sqlite3.rb
62
- - lib/apache_log_report/option_parser.rb
63
88
  - lib/apache_log_report/version.rb
64
89
  homepage: https://www.ict4g.net/gitea/adolfo/apache_log_report
65
90
  licenses:
@@ -1,49 +0,0 @@
1
- require 'date'
2
- require 'browser'
3
-
4
- class LogParserHash
5
- # make a matchdata into a Hash.
6
- # pure magic gotten from: http://zetcode.com/db/sqliteruby/connect/
7
- # Used during parsing to simplify the generation of the hash.
8
- class MatchData
9
- def to_h
10
- names.map(&:intern).zip(captures).to_h
11
- end
12
- end
13
-
14
- def parse filename, options = {}
15
- progressbar = ProgressBar.create(output: $stderr)
16
-
17
- content = filename ? File.readlines(filename) : ARGF.readlines
18
- progressbar.total = content.size
19
-
20
- # We parse combined log, which looks like:
21
- #
22
- # 66.249.70.16 - - [18/Aug/2020:23:03:00 +0200] "GET /eatc/assets/images/team/gunde.png HTTP/1.1" 200 61586 "-" "Googlebot-Image/1.0"
23
- # 178.172.20.114 - - [25/Aug/2020:17:13:21 +0200] "GET /favicon.ico HTTP/1.1" 404 196 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
24
- # we do not parse entries such as:
25
- combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<timestamp>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
26
-
27
- content.collect { |line|
28
- hashie = combined_regexp.match line
29
- hash = hashie.to_h
30
-
31
- progressbar.increment
32
-
33
- if hash != {}
34
- hash[:date_time] = DateTime.parse(hash[:timestamp].sub(":", " "))
35
- hash[:size] = hash[:size].to_i
36
- hash[:type] = hash[:uri] ? File.extname(hash[:uri]) : ""
37
-
38
- ua = Browser.new(hash[:user_agent_string], accept_language: "en-us")
39
- hash[:bot] = ua.bot?
40
- hash[:browser] = ua.name || ""
41
- hash[:browser_version] = ua.version || ""
42
- hash[:platform] = ua.platform.name || ""
43
- hash[:platform_version] = ua.platform.version || ""
44
-
45
- hash
46
- end
47
- }.compact
48
- end
49
- end
@@ -1,99 +0,0 @@
1
- #
2
- # SQLITE3
3
- #
4
- require 'sqlite3'
5
-
6
- class LogParser
7
- def self.parse filename, options = {}
8
-
9
- progressbar = ProgressBar.create(output: $stderr)
10
-
11
- content = filename ? File.readlines(filename) : ARGF.readlines
12
- progressbar.total = content.size
13
-
14
- db = SQLite3::Database.new ":memory:"
15
- db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
- id INTEGER PRIMARY KEY AUTOINCREMENT,
17
- date_time TEXT,
18
- ip TEXT,
19
- remote_log_name TEXT,
20
- method TEXT,
21
- uri TEXT,
22
- status TEXT,
23
- size INTEGER,
24
- referer TEXT,
25
- user_agent_string TEXT,
26
- bot INTEGER,
27
- browser TEXT,
28
- browser_version TEXT,
29
- platform TEXT,
30
- platform_version TEXT
31
- )"
32
-
33
- ins = db.prepare('insert into LogLine (
34
- date_time,
35
- ip,
36
- remote_log_name,
37
- method,
38
- uri,
39
- status,
40
- size,
41
- referer,
42
- user_agent_string,
43
- bot,
44
- browser,
45
- browser_version,
46
- platform,
47
- platform_version)
48
- values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
49
-
50
- combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<datetime>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
51
-
52
- content.collect { |line|
53
- hashie = combined_regexp.match line
54
-
55
- progressbar.increment
56
-
57
- puts hashie
58
- if hashie != {}
59
- ua = Browser.new(hashie[:user_agent_string], accept_language: "en-us")
60
- puts <<EOS
61
- #{hashie[:datetime].sub(":", " ")},
62
- #{hashie[:ip]},
63
- #{hashie[:remote_log_name]},
64
- #{hashie[:method]},
65
- #{hashie[:uri]},
66
- #{ hashie[:status]},
67
- #{ hashie[:size].to_i},
68
- #{ hashie[:referer]},
69
- #{ hashie[:user_agent_string]},
70
- #{ ua.bot? ? 1 : 0},
71
- #{ ua.name || ""},
72
- #{ ua.version || ""},
73
- #{ ua.platform.name || ""},
74
- #{ ua.platform.version || ""}
75
- EOS
76
-
77
- ins.execute(
78
- hashie[:datetime].sub(":", " "),
79
- hashie[:ip],
80
- hashie[:remote_log_name],
81
- hashie[:method],
82
- hashie[:uri],
83
- hashie[:status],
84
- hashie[:size].to_i,
85
- hashie[:referer],
86
- hashie[:user_agent_string],
87
- ua.bot? ? 1 : 0,
88
- (ua.name || ""),
89
- (ua.version || ""),
90
- (ua.platform.name || ""),
91
- (ua.platform.version || "")
92
- )
93
- end
94
- }
95
-
96
- db
97
- end
98
- end
99
-
@@ -1,63 +0,0 @@
1
- require 'optparse'
2
- require 'optparse/date'
3
-
4
- class OptionParser
5
- def self.parse(options)
6
- args = {}
7
-
8
- opt_parser = OptionParser.new do |opts|
9
- opts.banner = "Usage: log-analyzer.rb [options] logfile"
10
-
11
- opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{LIMIT})") do |n|
12
- args[:limit] = n
13
- end
14
-
15
- opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
16
- args[:from_date] = n
17
- end
18
-
19
- opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
20
- args[:to_date] = n
21
- end
22
-
23
- opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
24
- args[:ignore_crawlers] = true
25
- end
26
-
27
- opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
28
- args[:only_crawlers] = true
29
- end
30
-
31
- opts.on("-t", "--distinguish-crawlers", "Print totals distinguishing crawlers from visitors") do |n|
32
- args[:distinguish_crawlers] = true
33
- end
34
-
35
- opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
36
- args[:no_selfpoll] = true
37
- end
38
-
39
- opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
40
- args[:prefix] = n
41
- end
42
-
43
- opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
44
- args[:suffix] = n
45
- end
46
-
47
- opts.on("-h", "--help", "Prints this help") do
48
- puts opts
49
- exit
50
- end
51
- end
52
-
53
- opt_parser.parse!(options)
54
-
55
- args[:limit] ||= LIMIT
56
- args[:ignore_crawlers] ||= false
57
- args[:only_crawlers] ||= false
58
- args[:distinguish_crawlers] ||= false
59
- args[:no_selfpoll] ||= false
60
-
61
- return args
62
- end
63
- end