apache_log_report 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/apache_log_report.gemspec +2 -0
- data/exe/apache_log_report +16 -611
- data/lib/apache_log_report.rb +504 -3
- data/lib/apache_log_report/version.rb +1 -1
- metadata +29 -4
- data/lib/apache_log_report/log_parser_hash.rb +0 -49
- data/lib/apache_log_report/log_parser_sqlite3.rb +0 -99
- data/lib/apache_log_report/option_parser.rb +0 -63
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apache_log_report
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adolfo Villafiorita
|
@@ -10,6 +10,20 @@ bindir: exe
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: apache_log-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: browser
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +52,20 @@ dependencies:
|
|
38
52
|
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: terminal-table
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
41
69
|
description: Generate a request report in OrgMode format from an Apache log file.
|
42
70
|
email:
|
43
71
|
- adolfo.villafiorita@ict4g.net
|
@@ -57,9 +85,6 @@ files:
|
|
57
85
|
- bin/setup
|
58
86
|
- exe/apache_log_report
|
59
87
|
- lib/apache_log_report.rb
|
60
|
-
- lib/apache_log_report/log_parser_hash.rb
|
61
|
-
- lib/apache_log_report/log_parser_sqlite3.rb
|
62
|
-
- lib/apache_log_report/option_parser.rb
|
63
88
|
- lib/apache_log_report/version.rb
|
64
89
|
homepage: https://www.ict4g.net/gitea/adolfo/apache_log_report
|
65
90
|
licenses:
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
require 'browser'
|
3
|
-
|
4
|
-
class LogParserHash
|
5
|
-
# make a matchdata into a Hash.
|
6
|
-
# pure magic gotten from: http://zetcode.com/db/sqliteruby/connect/
|
7
|
-
# Used during parsing to simplify the generation of the hash.
|
8
|
-
class MatchData
|
9
|
-
def to_h
|
10
|
-
names.map(&:intern).zip(captures).to_h
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
def parse filename, options = {}
|
15
|
-
progressbar = ProgressBar.create(output: $stderr)
|
16
|
-
|
17
|
-
content = filename ? File.readlines(filename) : ARGF.readlines
|
18
|
-
progressbar.total = content.size
|
19
|
-
|
20
|
-
# We parse combined log, which looks like:
|
21
|
-
#
|
22
|
-
# 66.249.70.16 - - [18/Aug/2020:23:03:00 +0200] "GET /eatc/assets/images/team/gunde.png HTTP/1.1" 200 61586 "-" "Googlebot-Image/1.0"
|
23
|
-
# 178.172.20.114 - - [25/Aug/2020:17:13:21 +0200] "GET /favicon.ico HTTP/1.1" 404 196 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
|
24
|
-
# we do not parse entries such as:
|
25
|
-
combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<timestamp>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
|
26
|
-
|
27
|
-
content.collect { |line|
|
28
|
-
hashie = combined_regexp.match line
|
29
|
-
hash = hashie.to_h
|
30
|
-
|
31
|
-
progressbar.increment
|
32
|
-
|
33
|
-
if hash != {}
|
34
|
-
hash[:date_time] = DateTime.parse(hash[:timestamp].sub(":", " "))
|
35
|
-
hash[:size] = hash[:size].to_i
|
36
|
-
hash[:type] = hash[:uri] ? File.extname(hash[:uri]) : ""
|
37
|
-
|
38
|
-
ua = Browser.new(hash[:user_agent_string], accept_language: "en-us")
|
39
|
-
hash[:bot] = ua.bot?
|
40
|
-
hash[:browser] = ua.name || ""
|
41
|
-
hash[:browser_version] = ua.version || ""
|
42
|
-
hash[:platform] = ua.platform.name || ""
|
43
|
-
hash[:platform_version] = ua.platform.version || ""
|
44
|
-
|
45
|
-
hash
|
46
|
-
end
|
47
|
-
}.compact
|
48
|
-
end
|
49
|
-
end
|
@@ -1,99 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# SQLITE3
|
3
|
-
#
|
4
|
-
require 'sqlite3'
|
5
|
-
|
6
|
-
class LogParser
|
7
|
-
def self.parse filename, options = {}
|
8
|
-
|
9
|
-
progressbar = ProgressBar.create(output: $stderr)
|
10
|
-
|
11
|
-
content = filename ? File.readlines(filename) : ARGF.readlines
|
12
|
-
progressbar.total = content.size
|
13
|
-
|
14
|
-
db = SQLite3::Database.new ":memory:"
|
15
|
-
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
16
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
17
|
-
date_time TEXT,
|
18
|
-
ip TEXT,
|
19
|
-
remote_log_name TEXT,
|
20
|
-
method TEXT,
|
21
|
-
uri TEXT,
|
22
|
-
status TEXT,
|
23
|
-
size INTEGER,
|
24
|
-
referer TEXT,
|
25
|
-
user_agent_string TEXT,
|
26
|
-
bot INTEGER,
|
27
|
-
browser TEXT,
|
28
|
-
browser_version TEXT,
|
29
|
-
platform TEXT,
|
30
|
-
platform_version TEXT
|
31
|
-
)"
|
32
|
-
|
33
|
-
ins = db.prepare('insert into LogLine (
|
34
|
-
date_time,
|
35
|
-
ip,
|
36
|
-
remote_log_name,
|
37
|
-
method,
|
38
|
-
uri,
|
39
|
-
status,
|
40
|
-
size,
|
41
|
-
referer,
|
42
|
-
user_agent_string,
|
43
|
-
bot,
|
44
|
-
browser,
|
45
|
-
browser_version,
|
46
|
-
platform,
|
47
|
-
platform_version)
|
48
|
-
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
49
|
-
|
50
|
-
combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<datetime>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
|
51
|
-
|
52
|
-
content.collect { |line|
|
53
|
-
hashie = combined_regexp.match line
|
54
|
-
|
55
|
-
progressbar.increment
|
56
|
-
|
57
|
-
puts hashie
|
58
|
-
if hashie != {}
|
59
|
-
ua = Browser.new(hashie[:user_agent_string], accept_language: "en-us")
|
60
|
-
puts <<EOS
|
61
|
-
#{hashie[:datetime].sub(":", " ")},
|
62
|
-
#{hashie[:ip]},
|
63
|
-
#{hashie[:remote_log_name]},
|
64
|
-
#{hashie[:method]},
|
65
|
-
#{hashie[:uri]},
|
66
|
-
#{ hashie[:status]},
|
67
|
-
#{ hashie[:size].to_i},
|
68
|
-
#{ hashie[:referer]},
|
69
|
-
#{ hashie[:user_agent_string]},
|
70
|
-
#{ ua.bot? ? 1 : 0},
|
71
|
-
#{ ua.name || ""},
|
72
|
-
#{ ua.version || ""},
|
73
|
-
#{ ua.platform.name || ""},
|
74
|
-
#{ ua.platform.version || ""}
|
75
|
-
EOS
|
76
|
-
|
77
|
-
ins.execute(
|
78
|
-
hashie[:datetime].sub(":", " "),
|
79
|
-
hashie[:ip],
|
80
|
-
hashie[:remote_log_name],
|
81
|
-
hashie[:method],
|
82
|
-
hashie[:uri],
|
83
|
-
hashie[:status],
|
84
|
-
hashie[:size].to_i,
|
85
|
-
hashie[:referer],
|
86
|
-
hashie[:user_agent_string],
|
87
|
-
ua.bot? ? 1 : 0,
|
88
|
-
(ua.name || ""),
|
89
|
-
(ua.version || ""),
|
90
|
-
(ua.platform.name || ""),
|
91
|
-
(ua.platform.version || "")
|
92
|
-
)
|
93
|
-
end
|
94
|
-
}
|
95
|
-
|
96
|
-
db
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
@@ -1,63 +0,0 @@
|
|
1
|
-
require 'optparse'
|
2
|
-
require 'optparse/date'
|
3
|
-
|
4
|
-
class OptionParser
|
5
|
-
def self.parse(options)
|
6
|
-
args = {}
|
7
|
-
|
8
|
-
opt_parser = OptionParser.new do |opts|
|
9
|
-
opts.banner = "Usage: log-analyzer.rb [options] logfile"
|
10
|
-
|
11
|
-
opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{LIMIT})") do |n|
|
12
|
-
args[:limit] = n
|
13
|
-
end
|
14
|
-
|
15
|
-
opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
|
16
|
-
args[:from_date] = n
|
17
|
-
end
|
18
|
-
|
19
|
-
opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
|
20
|
-
args[:to_date] = n
|
21
|
-
end
|
22
|
-
|
23
|
-
opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
|
24
|
-
args[:ignore_crawlers] = true
|
25
|
-
end
|
26
|
-
|
27
|
-
opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
|
28
|
-
args[:only_crawlers] = true
|
29
|
-
end
|
30
|
-
|
31
|
-
opts.on("-t", "--distinguish-crawlers", "Print totals distinguishing crawlers from visitors") do |n|
|
32
|
-
args[:distinguish_crawlers] = true
|
33
|
-
end
|
34
|
-
|
35
|
-
opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
|
36
|
-
args[:no_selfpoll] = true
|
37
|
-
end
|
38
|
-
|
39
|
-
opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
40
|
-
args[:prefix] = n
|
41
|
-
end
|
42
|
-
|
43
|
-
opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
44
|
-
args[:suffix] = n
|
45
|
-
end
|
46
|
-
|
47
|
-
opts.on("-h", "--help", "Prints this help") do
|
48
|
-
puts opts
|
49
|
-
exit
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
opt_parser.parse!(options)
|
54
|
-
|
55
|
-
args[:limit] ||= LIMIT
|
56
|
-
args[:ignore_crawlers] ||= false
|
57
|
-
args[:only_crawlers] ||= false
|
58
|
-
args[:distinguish_crawlers] ||= false
|
59
|
-
args[:no_selfpoll] ||= false
|
60
|
-
|
61
|
-
return args
|
62
|
-
end
|
63
|
-
end
|