apache_log_report 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/apache_log_report.gemspec +2 -0
- data/exe/apache_log_report +16 -611
- data/lib/apache_log_report.rb +504 -3
- data/lib/apache_log_report/version.rb +1 -1
- metadata +29 -4
- data/lib/apache_log_report/log_parser_hash.rb +0 -49
- data/lib/apache_log_report/log_parser_sqlite3.rb +0 -99
- data/lib/apache_log_report/option_parser.rb +0 -63
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apache_log_report
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adolfo Villafiorita
|
@@ -10,6 +10,20 @@ bindir: exe
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: apache_log-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: browser
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +52,20 @@ dependencies:
|
|
38
52
|
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: terminal-table
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
41
69
|
description: Generate a request report in OrgMode format from an Apache log file.
|
42
70
|
email:
|
43
71
|
- adolfo.villafiorita@ict4g.net
|
@@ -57,9 +85,6 @@ files:
|
|
57
85
|
- bin/setup
|
58
86
|
- exe/apache_log_report
|
59
87
|
- lib/apache_log_report.rb
|
60
|
-
- lib/apache_log_report/log_parser_hash.rb
|
61
|
-
- lib/apache_log_report/log_parser_sqlite3.rb
|
62
|
-
- lib/apache_log_report/option_parser.rb
|
63
88
|
- lib/apache_log_report/version.rb
|
64
89
|
homepage: https://www.ict4g.net/gitea/adolfo/apache_log_report
|
65
90
|
licenses:
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
require 'browser'
|
3
|
-
|
4
|
-
class LogParserHash
|
5
|
-
# make a matchdata into a Hash.
|
6
|
-
# pure magic gotten from: http://zetcode.com/db/sqliteruby/connect/
|
7
|
-
# Used during parsing to simplify the generation of the hash.
|
8
|
-
class MatchData
|
9
|
-
def to_h
|
10
|
-
names.map(&:intern).zip(captures).to_h
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
def parse filename, options = {}
|
15
|
-
progressbar = ProgressBar.create(output: $stderr)
|
16
|
-
|
17
|
-
content = filename ? File.readlines(filename) : ARGF.readlines
|
18
|
-
progressbar.total = content.size
|
19
|
-
|
20
|
-
# We parse combined log, which looks like:
|
21
|
-
#
|
22
|
-
# 66.249.70.16 - - [18/Aug/2020:23:03:00 +0200] "GET /eatc/assets/images/team/gunde.png HTTP/1.1" 200 61586 "-" "Googlebot-Image/1.0"
|
23
|
-
# 178.172.20.114 - - [25/Aug/2020:17:13:21 +0200] "GET /favicon.ico HTTP/1.1" 404 196 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
|
24
|
-
# we do not parse entries such as:
|
25
|
-
combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<timestamp>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
|
26
|
-
|
27
|
-
content.collect { |line|
|
28
|
-
hashie = combined_regexp.match line
|
29
|
-
hash = hashie.to_h
|
30
|
-
|
31
|
-
progressbar.increment
|
32
|
-
|
33
|
-
if hash != {}
|
34
|
-
hash[:date_time] = DateTime.parse(hash[:timestamp].sub(":", " "))
|
35
|
-
hash[:size] = hash[:size].to_i
|
36
|
-
hash[:type] = hash[:uri] ? File.extname(hash[:uri]) : ""
|
37
|
-
|
38
|
-
ua = Browser.new(hash[:user_agent_string], accept_language: "en-us")
|
39
|
-
hash[:bot] = ua.bot?
|
40
|
-
hash[:browser] = ua.name || ""
|
41
|
-
hash[:browser_version] = ua.version || ""
|
42
|
-
hash[:platform] = ua.platform.name || ""
|
43
|
-
hash[:platform_version] = ua.platform.version || ""
|
44
|
-
|
45
|
-
hash
|
46
|
-
end
|
47
|
-
}.compact
|
48
|
-
end
|
49
|
-
end
|
@@ -1,99 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# SQLITE3
|
3
|
-
#
|
4
|
-
require 'sqlite3'
|
5
|
-
|
6
|
-
class LogParser
|
7
|
-
def self.parse filename, options = {}
|
8
|
-
|
9
|
-
progressbar = ProgressBar.create(output: $stderr)
|
10
|
-
|
11
|
-
content = filename ? File.readlines(filename) : ARGF.readlines
|
12
|
-
progressbar.total = content.size
|
13
|
-
|
14
|
-
db = SQLite3::Database.new ":memory:"
|
15
|
-
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
16
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
17
|
-
date_time TEXT,
|
18
|
-
ip TEXT,
|
19
|
-
remote_log_name TEXT,
|
20
|
-
method TEXT,
|
21
|
-
uri TEXT,
|
22
|
-
status TEXT,
|
23
|
-
size INTEGER,
|
24
|
-
referer TEXT,
|
25
|
-
user_agent_string TEXT,
|
26
|
-
bot INTEGER,
|
27
|
-
browser TEXT,
|
28
|
-
browser_version TEXT,
|
29
|
-
platform TEXT,
|
30
|
-
platform_version TEXT
|
31
|
-
)"
|
32
|
-
|
33
|
-
ins = db.prepare('insert into LogLine (
|
34
|
-
date_time,
|
35
|
-
ip,
|
36
|
-
remote_log_name,
|
37
|
-
method,
|
38
|
-
uri,
|
39
|
-
status,
|
40
|
-
size,
|
41
|
-
referer,
|
42
|
-
user_agent_string,
|
43
|
-
bot,
|
44
|
-
browser,
|
45
|
-
browser_version,
|
46
|
-
platform,
|
47
|
-
platform_version)
|
48
|
-
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
49
|
-
|
50
|
-
combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<datetime>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
|
51
|
-
|
52
|
-
content.collect { |line|
|
53
|
-
hashie = combined_regexp.match line
|
54
|
-
|
55
|
-
progressbar.increment
|
56
|
-
|
57
|
-
puts hashie
|
58
|
-
if hashie != {}
|
59
|
-
ua = Browser.new(hashie[:user_agent_string], accept_language: "en-us")
|
60
|
-
puts <<EOS
|
61
|
-
#{hashie[:datetime].sub(":", " ")},
|
62
|
-
#{hashie[:ip]},
|
63
|
-
#{hashie[:remote_log_name]},
|
64
|
-
#{hashie[:method]},
|
65
|
-
#{hashie[:uri]},
|
66
|
-
#{ hashie[:status]},
|
67
|
-
#{ hashie[:size].to_i},
|
68
|
-
#{ hashie[:referer]},
|
69
|
-
#{ hashie[:user_agent_string]},
|
70
|
-
#{ ua.bot? ? 1 : 0},
|
71
|
-
#{ ua.name || ""},
|
72
|
-
#{ ua.version || ""},
|
73
|
-
#{ ua.platform.name || ""},
|
74
|
-
#{ ua.platform.version || ""}
|
75
|
-
EOS
|
76
|
-
|
77
|
-
ins.execute(
|
78
|
-
hashie[:datetime].sub(":", " "),
|
79
|
-
hashie[:ip],
|
80
|
-
hashie[:remote_log_name],
|
81
|
-
hashie[:method],
|
82
|
-
hashie[:uri],
|
83
|
-
hashie[:status],
|
84
|
-
hashie[:size].to_i,
|
85
|
-
hashie[:referer],
|
86
|
-
hashie[:user_agent_string],
|
87
|
-
ua.bot? ? 1 : 0,
|
88
|
-
(ua.name || ""),
|
89
|
-
(ua.version || ""),
|
90
|
-
(ua.platform.name || ""),
|
91
|
-
(ua.platform.version || "")
|
92
|
-
)
|
93
|
-
end
|
94
|
-
}
|
95
|
-
|
96
|
-
db
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
@@ -1,63 +0,0 @@
|
|
1
|
-
require 'optparse'
|
2
|
-
require 'optparse/date'
|
3
|
-
|
4
|
-
class OptionParser
|
5
|
-
def self.parse(options)
|
6
|
-
args = {}
|
7
|
-
|
8
|
-
opt_parser = OptionParser.new do |opts|
|
9
|
-
opts.banner = "Usage: log-analyzer.rb [options] logfile"
|
10
|
-
|
11
|
-
opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{LIMIT})") do |n|
|
12
|
-
args[:limit] = n
|
13
|
-
end
|
14
|
-
|
15
|
-
opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
|
16
|
-
args[:from_date] = n
|
17
|
-
end
|
18
|
-
|
19
|
-
opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
|
20
|
-
args[:to_date] = n
|
21
|
-
end
|
22
|
-
|
23
|
-
opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
|
24
|
-
args[:ignore_crawlers] = true
|
25
|
-
end
|
26
|
-
|
27
|
-
opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
|
28
|
-
args[:only_crawlers] = true
|
29
|
-
end
|
30
|
-
|
31
|
-
opts.on("-t", "--distinguish-crawlers", "Print totals distinguishing crawlers from visitors") do |n|
|
32
|
-
args[:distinguish_crawlers] = true
|
33
|
-
end
|
34
|
-
|
35
|
-
opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
|
36
|
-
args[:no_selfpoll] = true
|
37
|
-
end
|
38
|
-
|
39
|
-
opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
40
|
-
args[:prefix] = n
|
41
|
-
end
|
42
|
-
|
43
|
-
opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
44
|
-
args[:suffix] = n
|
45
|
-
end
|
46
|
-
|
47
|
-
opts.on("-h", "--help", "Prints this help") do
|
48
|
-
puts opts
|
49
|
-
exit
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
opt_parser.parse!(options)
|
54
|
-
|
55
|
-
args[:limit] ||= LIMIT
|
56
|
-
args[:ignore_crawlers] ||= false
|
57
|
-
args[:only_crawlers] ||= false
|
58
|
-
args[:distinguish_crawlers] ||= false
|
59
|
-
args[:no_selfpoll] ||= false
|
60
|
-
|
61
|
-
return args
|
62
|
-
end
|
63
|
-
end
|