apache_log_report 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -6
- data/exe/apache_log_report +16 -4
- data/lib/apache_log_report/apache_log_report.rb +245 -0
- data/lib/apache_log_report/data_cruncher.rb +84 -0
- data/lib/apache_log_report/emitter.rb +50 -0
- data/lib/apache_log_report/log_parser.rb +87 -0
- data/lib/apache_log_report/options_parser.rb +86 -0
- data/lib/apache_log_report/templates/_output_table.html.erb +25 -0
- data/lib/apache_log_report/templates/template.html.erb +164 -0
- data/lib/apache_log_report/templates/template.org.erb +262 -0
- data/lib/apache_log_report/version.rb +1 -1
- data/lib/apache_log_report.rb +5 -532
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f35ffbe99784dc77cbd61bb7b949237b9b380448b4829e442f0ad17513adc7b7
|
4
|
+
data.tar.gz: 1d5030f64425293895b4b1fc84199178971bd34bde0d9b8450e868ec1d427cc3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3eed1f80dec99429d5a3f25a99fd6e7ceca28547df852d97422a7cdb11b5d045592eb1c7e15443aca6c38744a7b62f59422049783f86d9b379039374c1b4ef09
|
7
|
+
data.tar.gz: 7de8e31cc0024d34191b6b70f8716bc4d31e037764e0f776d75a806490fe1f8cb170c8c989c570fb84f0329c0f12298d5a3bc39b32caa3cdcd826775c3ceaaae
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
apache_log_report (
|
4
|
+
apache_log_report (1.1.0)
|
5
5
|
apache_log-parser
|
6
6
|
browser
|
7
7
|
sqlite3
|
@@ -11,12 +11,12 @@ GEM
|
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
13
|
apache_log-parser (3.1.2)
|
14
|
-
browser (5.
|
14
|
+
browser (5.3.1)
|
15
15
|
rake (12.3.3)
|
16
16
|
sqlite3 (1.4.2)
|
17
|
-
terminal-table (
|
18
|
-
unicode-display_width (
|
19
|
-
unicode-display_width (1.
|
17
|
+
terminal-table (3.0.2)
|
18
|
+
unicode-display_width (>= 1.1.1, < 3)
|
19
|
+
unicode-display_width (2.1.0)
|
20
20
|
|
21
21
|
PLATFORMS
|
22
22
|
ruby
|
@@ -26,4 +26,4 @@ DEPENDENCIES
|
|
26
26
|
rake (~> 12.0)
|
27
27
|
|
28
28
|
BUNDLED WITH
|
29
|
-
2.
|
29
|
+
2.2.29
|
data/exe/apache_log_report
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
require 'apache_log_report.rb'
|
4
4
|
|
5
|
+
|
5
6
|
#
|
6
7
|
# Parse Command Line Arguments
|
7
8
|
#
|
8
9
|
|
9
10
|
@command = ARGV.join(" ")
|
10
|
-
@options = ApacheLogReport.
|
11
|
+
@options = ApacheLogReport::OptionsParser.parse ARGV
|
11
12
|
@log_file = ARGV[0]
|
12
13
|
|
13
14
|
if @log_file and not File.exist? @log_file
|
@@ -20,12 +21,23 @@ end
|
|
20
21
|
#
|
21
22
|
|
22
23
|
@started_at = Time.now
|
23
|
-
|
24
|
-
ApacheLogReport.
|
24
|
+
|
25
|
+
@db = ApacheLogReport::LogParser.parse @log_file
|
26
|
+
@data = ApacheLogReport::DataCruncher.crunch @db, @options
|
27
|
+
|
25
28
|
@ended_at = Time.now
|
26
29
|
@duration = @ended_at - @started_at
|
27
30
|
|
31
|
+
@data = @data.merge({
|
32
|
+
command: @command,
|
33
|
+
log_file: @log_file,
|
34
|
+
started_at: @started_at,
|
35
|
+
ended_at: @ended_at,
|
36
|
+
duration: @duration
|
37
|
+
})
|
38
|
+
|
28
39
|
#
|
29
40
|
# Emit Output
|
30
41
|
#
|
31
|
-
|
42
|
+
|
43
|
+
puts ApacheLogReport::Emitter.emit @data, @options
|
@@ -0,0 +1,245 @@
|
|
1
|
+
module ApacheLogReport
|
2
|
+
|
3
|
+
#
|
4
|
+
# parse command line options
|
5
|
+
#
|
6
|
+
require 'optparse'
|
7
|
+
require 'optparse/date'
|
8
|
+
require 'apache_log_report/version'
|
9
|
+
|
10
|
+
def self.options_parse options
|
11
|
+
limit = 30
|
12
|
+
args = {}
|
13
|
+
|
14
|
+
opt_parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: apache_log_report [options] [logfile]"
|
16
|
+
|
17
|
+
opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
|
18
|
+
args[:limit] = n
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
|
22
|
+
args[:from_date] = n
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
|
26
|
+
args[:to_date] = n
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
|
30
|
+
args[:ignore_crawlers] = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
|
34
|
+
args[:no_selfpoll] = true
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
|
38
|
+
args[:only_crawlers] = true
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
42
|
+
args[:prefix] = n
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
46
|
+
args[:suffix] = n
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
|
50
|
+
args[:code_export] = n
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-v", "--version", "Prints version information") do
|
54
|
+
puts "apache_log_report version #{ApacheLogReport::VERSION}"
|
55
|
+
puts "Copyright (C) 2020 Adolfo Villafiorita"
|
56
|
+
puts "Distributed under the terms of the MIT license"
|
57
|
+
puts ""
|
58
|
+
puts "Written by Adolfo Villafiorita"
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on("-h", "--help", "Prints this help") do
|
63
|
+
puts opts
|
64
|
+
puts "This is version #{ApacheLogReport::VERSION}"
|
65
|
+
exit
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
opt_parser.parse!(options)
|
70
|
+
|
71
|
+
args[:limit] ||= limit
|
72
|
+
args[:ignore_crawlers] ||= false
|
73
|
+
args[:no_selfpoll] ||= false
|
74
|
+
args[:only_crawlers] ||= false
|
75
|
+
args[:prefix] ||= ""
|
76
|
+
args[:suffix] ||= ""
|
77
|
+
args[:code_export] ||= "both"
|
78
|
+
|
79
|
+
return args
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# parse an Apache log file and return a SQLite3 DB
|
84
|
+
#
|
85
|
+
require 'apache_log/parser'
|
86
|
+
require 'sqlite3'
|
87
|
+
require 'browser'
|
88
|
+
|
89
|
+
def self.parse filename, options = {}
|
90
|
+
content = filename ? File.readlines(filename) : ARGF.readlines
|
91
|
+
|
92
|
+
db = SQLite3::Database.new ":memory:"
|
93
|
+
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
94
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
95
|
+
datetime TEXT,
|
96
|
+
ip TEXT,
|
97
|
+
user TEXT,
|
98
|
+
unique_visitor TEXT,
|
99
|
+
method TEXT,
|
100
|
+
path TEXT,
|
101
|
+
extension TEXT,
|
102
|
+
status TEXT,
|
103
|
+
size INTEGER,
|
104
|
+
referer TEXT,
|
105
|
+
user_agent TEXT,
|
106
|
+
bot INTEGER,
|
107
|
+
browser TEXT,
|
108
|
+
browser_version TEXT,
|
109
|
+
platform TEXT,
|
110
|
+
platform_version TEXT)"
|
111
|
+
|
112
|
+
ins = db.prepare('insert into LogLine (
|
113
|
+
datetime,
|
114
|
+
ip,
|
115
|
+
user,
|
116
|
+
unique_visitor,
|
117
|
+
method,
|
118
|
+
path,
|
119
|
+
extension,
|
120
|
+
status,
|
121
|
+
size,
|
122
|
+
referer,
|
123
|
+
user_agent,
|
124
|
+
bot,
|
125
|
+
browser,
|
126
|
+
browser_version,
|
127
|
+
platform,
|
128
|
+
platform_version)
|
129
|
+
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
130
|
+
|
131
|
+
parser = ApacheLog::Parser.new(options[:format] || 'combined')
|
132
|
+
|
133
|
+
content.each do |line|
|
134
|
+
begin
|
135
|
+
hash = parser.parse line
|
136
|
+
|
137
|
+
ua = Browser.new(hash[:user_agent], accept_language: "en-us")
|
138
|
+
ins.execute(
|
139
|
+
hash[:datetime].iso8601,
|
140
|
+
hash[:remote_host],
|
141
|
+
hash[:user],
|
142
|
+
hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
|
143
|
+
hash[:request][:method],
|
144
|
+
hash[:request][:path],
|
145
|
+
(hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
|
146
|
+
hash[:status],
|
147
|
+
hash[:size].to_i,
|
148
|
+
hash[:referer],
|
149
|
+
hash[:user_agent],
|
150
|
+
ua.bot? ? 1 : 0,
|
151
|
+
(ua.name || ""),
|
152
|
+
(ua.version || ""),
|
153
|
+
(ua.platform.name || ""),
|
154
|
+
(ua.platform.version || "")
|
155
|
+
)
|
156
|
+
rescue
|
157
|
+
STDERR.puts "Apache Log parser error: could not parse #{line}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
db
|
162
|
+
end
|
163
|
+
|
164
|
+
#
|
165
|
+
# take a sqlite3 database and analyze data
|
166
|
+
#
|
167
|
+
def self.analyze_data db, options = {}
|
168
|
+
|
169
|
+
@first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
170
|
+
@last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
171
|
+
@log_size = db.execute "SELECT count(datetime) from LogLine"
|
172
|
+
@crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
|
173
|
+
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
174
|
+
|
175
|
+
#
|
176
|
+
# generate the where clause corresponding to the command line options to filter data
|
177
|
+
#
|
178
|
+
@filter = [
|
179
|
+
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
180
|
+
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
181
|
+
(options[:only_crawlers] ? "bot == 1" : nil),
|
182
|
+
(options[:ignore_crawlers] ? "bot == 0" : nil),
|
183
|
+
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
184
|
+
"true"
|
185
|
+
].compact.join " and "
|
186
|
+
|
187
|
+
# in alternative to sum(size)
|
188
|
+
human_readable_size = <<-EOS
|
189
|
+
CASE
|
190
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
191
|
+
WHEN sum(size) >= 1024 AND sum(size) < (1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / 1024),2) || ' KB'
|
192
|
+
WHEN sum(size) >= (1024 * 1024) AND sum(size) < (1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024)),2) || ' MB'
|
193
|
+
WHEN sum(size) >= (1024 * 1024 * 1024) AND sum(size) < (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024)),2) || ' GB'
|
194
|
+
WHEN sum(size) >= (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024 * 1024)),2) || ' TB'
|
195
|
+
END AS size
|
196
|
+
EOS
|
197
|
+
|
198
|
+
@total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
|
199
|
+
@total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
|
200
|
+
@total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{@filter}"
|
201
|
+
@total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
|
202
|
+
|
203
|
+
@daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by date(datetime)"
|
204
|
+
|
205
|
+
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by strftime('%H', datetime)"
|
206
|
+
|
207
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
208
|
+
|
209
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
210
|
+
|
211
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
212
|
+
|
213
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
214
|
+
|
215
|
+
@reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
|
216
|
+
"extension != '#{x}'"
|
217
|
+
}.join " and "
|
218
|
+
|
219
|
+
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
220
|
+
|
221
|
+
@statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
|
222
|
+
|
223
|
+
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
|
224
|
+
@by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
|
225
|
+
@by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
|
226
|
+
|
227
|
+
@statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
|
228
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
229
|
+
}
|
230
|
+
|
231
|
+
@browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by browser order by count(browser) desc"
|
232
|
+
|
233
|
+
@platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by platform order by count(platform) desc"
|
234
|
+
|
235
|
+
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
236
|
+
|
237
|
+
@referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
|
238
|
+
|
239
|
+
|
240
|
+
end
|
241
|
+
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module ApacheLogReport
|
2
|
+
module DataCruncher
|
3
|
+
|
4
|
+
#
|
5
|
+
# take a sqlite3 database and analyze data
|
6
|
+
#
|
7
|
+
|
8
|
+
def self.crunch db, options = {}
|
9
|
+
@first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
10
|
+
@last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
11
|
+
@log_size = db.execute "SELECT count(datetime) from LogLine"
|
12
|
+
@crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
|
13
|
+
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
14
|
+
|
15
|
+
#
|
16
|
+
# generate the where clause corresponding to the command line options to filter data
|
17
|
+
#
|
18
|
+
filter = [
|
19
|
+
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
20
|
+
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
21
|
+
(options[:only_crawlers] ? "bot == 1" : nil),
|
22
|
+
(options[:ignore_crawlers] ? "bot == 0" : nil),
|
23
|
+
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
24
|
+
"true"
|
25
|
+
].compact.join " and "
|
26
|
+
|
27
|
+
mega = 1024 * 1024
|
28
|
+
giga = mega * 1024
|
29
|
+
tera = giga * 1024
|
30
|
+
|
31
|
+
# in alternative to sum(size)
|
32
|
+
human_readable_size = <<-EOS
|
33
|
+
CASE
|
34
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
35
|
+
WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
36
|
+
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
37
|
+
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
38
|
+
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
39
|
+
END AS size
|
40
|
+
EOS
|
41
|
+
|
42
|
+
@total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
|
43
|
+
@total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
|
44
|
+
@total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
|
45
|
+
@total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
|
46
|
+
|
47
|
+
@daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
|
48
|
+
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
|
49
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
50
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
51
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
52
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
53
|
+
|
54
|
+
@reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
|
55
|
+
"extension != '#{x}'"
|
56
|
+
}.join " and "
|
57
|
+
|
58
|
+
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
59
|
+
@statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
|
60
|
+
|
61
|
+
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
|
62
|
+
@by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
|
63
|
+
@by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
|
64
|
+
|
65
|
+
@statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
|
66
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
67
|
+
}
|
68
|
+
|
69
|
+
@browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
|
70
|
+
@platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
|
71
|
+
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
72
|
+
@referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
|
73
|
+
|
74
|
+
data = {}
|
75
|
+
self.instance_variables.each do |variable|
|
76
|
+
var_as_symbol = variable.to_s[1..-1].to_sym
|
77
|
+
data[var_as_symbol] = eval(variable.to_s)
|
78
|
+
end
|
79
|
+
data
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'terminal-table'
|
2
|
+
require 'erb'
|
3
|
+
require 'ostruct'
|
4
|
+
require 'byebug'
|
5
|
+
|
6
|
+
module ApacheLogReport
|
7
|
+
module Emitter
|
8
|
+
|
9
|
+
#
|
10
|
+
# Emit Data
|
11
|
+
#
|
12
|
+
def self.emit data = {}, options = {}
|
13
|
+
@prefix = options[:prefix]
|
14
|
+
@suffix = options[:suffix]
|
15
|
+
@export = options[:code_export]
|
16
|
+
@mode = options[:output_format] || "org"
|
17
|
+
|
18
|
+
# for the ERB binding
|
19
|
+
@data = data
|
20
|
+
@options = options
|
21
|
+
|
22
|
+
# determine the main template to read
|
23
|
+
@template = File.join(File.dirname(__FILE__), "templates", "template.#{@mode}.erb")
|
24
|
+
erb_template = File.read @template
|
25
|
+
|
26
|
+
output = ERB.new(erb_template).result(binding)
|
27
|
+
puts output
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def self.output_table name, headings, rows
|
33
|
+
name = "#+NAME: #{name}"
|
34
|
+
table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
|
35
|
+
|
36
|
+
#(2..headings.size).each do |i|
|
37
|
+
# table.align_column(i, :right)
|
38
|
+
#end
|
39
|
+
|
40
|
+
name + "\n" + table.to_s
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.render(template, vars)
|
44
|
+
@template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
|
45
|
+
erb_template = File.read @template
|
46
|
+
ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'apache_log/parser'
|
2
|
+
require 'sqlite3'
|
3
|
+
require 'browser'
|
4
|
+
|
5
|
+
module ApacheLogReport
|
6
|
+
module LogParser
|
7
|
+
#
|
8
|
+
# parse an Apache log file and return a SQLite3 DB
|
9
|
+
#
|
10
|
+
|
11
|
+
def self.parse filename, options = {}
|
12
|
+
content = filename ? File.readlines(filename) : ARGF.readlines
|
13
|
+
|
14
|
+
db = SQLite3::Database.new ":memory:"
|
15
|
+
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
16
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
17
|
+
datetime TEXT,
|
18
|
+
ip TEXT,
|
19
|
+
user TEXT,
|
20
|
+
unique_visitor TEXT,
|
21
|
+
method TEXT,
|
22
|
+
path TEXT,
|
23
|
+
extension TEXT,
|
24
|
+
status TEXT,
|
25
|
+
size INTEGER,
|
26
|
+
referer TEXT,
|
27
|
+
user_agent TEXT,
|
28
|
+
bot INTEGER,
|
29
|
+
browser TEXT,
|
30
|
+
browser_version TEXT,
|
31
|
+
platform TEXT,
|
32
|
+
platform_version TEXT)"
|
33
|
+
|
34
|
+
ins = db.prepare('insert into LogLine (
|
35
|
+
datetime,
|
36
|
+
ip,
|
37
|
+
user,
|
38
|
+
unique_visitor,
|
39
|
+
method,
|
40
|
+
path,
|
41
|
+
extension,
|
42
|
+
status,
|
43
|
+
size,
|
44
|
+
referer,
|
45
|
+
user_agent,
|
46
|
+
bot,
|
47
|
+
browser,
|
48
|
+
browser_version,
|
49
|
+
platform,
|
50
|
+
platform_version)
|
51
|
+
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
52
|
+
|
53
|
+
parser = ApacheLog::Parser.new(options[:format] || 'combined')
|
54
|
+
|
55
|
+
content.each do |line|
|
56
|
+
begin
|
57
|
+
hash = parser.parse line
|
58
|
+
|
59
|
+
ua = Browser.new(hash[:user_agent], accept_language: "en-us")
|
60
|
+
ins.execute(
|
61
|
+
hash[:datetime].iso8601,
|
62
|
+
hash[:remote_host],
|
63
|
+
hash[:user],
|
64
|
+
hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
|
65
|
+
hash[:request][:method],
|
66
|
+
hash[:request][:path],
|
67
|
+
(hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
|
68
|
+
hash[:status],
|
69
|
+
hash[:size].to_i,
|
70
|
+
hash[:referer],
|
71
|
+
hash[:user_agent],
|
72
|
+
ua.bot? ? 1 : 0,
|
73
|
+
(ua.name || ""),
|
74
|
+
(ua.version || ""),
|
75
|
+
(ua.platform.name || ""),
|
76
|
+
(ua.platform.version || "")
|
77
|
+
)
|
78
|
+
rescue
|
79
|
+
STDERR.puts "Apache Log parser error: could not parse #{line}"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
db
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'optparse/date'
|
3
|
+
require 'apache_log_report/version'
|
4
|
+
|
5
|
+
module ApacheLogReport
|
6
|
+
module OptionsParser
|
7
|
+
#
|
8
|
+
# parse command line options
|
9
|
+
#
|
10
|
+
def self.parse options
|
11
|
+
limit = 30
|
12
|
+
args = {}
|
13
|
+
|
14
|
+
opt_parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: apache_log_report [options] [logfile]"
|
16
|
+
|
17
|
+
opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
|
18
|
+
args[:limit] = n
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
|
22
|
+
args[:from_date] = n
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
|
26
|
+
args[:to_date] = n
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
|
30
|
+
args[:ignore_crawlers] = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
|
34
|
+
args[:no_selfpoll] = true
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
|
38
|
+
args[:only_crawlers] = true
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
42
|
+
args[:prefix] = n
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
46
|
+
args[:suffix] = n
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in Org Mode code blocks (code, results, *both*, none)") do |n|
|
50
|
+
args[:code_export] = n
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-fFORMAT", "--format=FORMAT", String, "Output format: html, org. Defaults to org mode") do |n|
|
54
|
+
args[:output_format] = n
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("-v", "--version", "Prints version information") do
|
58
|
+
puts "apache_log_report version #{ApacheLogReport::VERSION}"
|
59
|
+
puts "Copyright (C) 2020 Adolfo Villafiorita"
|
60
|
+
puts "Distributed under the terms of the MIT license"
|
61
|
+
puts ""
|
62
|
+
puts "Written by Adolfo Villafiorita"
|
63
|
+
exit
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-h", "--help", "Prints this help") do
|
67
|
+
puts opts
|
68
|
+
puts "This is version #{ApacheLogReport::VERSION}"
|
69
|
+
exit
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
opt_parser.parse!(options)
|
74
|
+
|
75
|
+
args[:limit] ||= limit
|
76
|
+
args[:ignore_crawlers] ||= false
|
77
|
+
args[:no_selfpoll] ||= false
|
78
|
+
args[:only_crawlers] ||= false
|
79
|
+
args[:prefix] ||= ""
|
80
|
+
args[:suffix] ||= ""
|
81
|
+
args[:code_export] ||= "both"
|
82
|
+
|
83
|
+
return args
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
<%
|
2
|
+
def slugify string
|
3
|
+
string.downcase.gsub(/ +/, '-')
|
4
|
+
end
|
5
|
+
%>
|
6
|
+
|
7
|
+
<table id="<%= slugify(title || "") %>" class="<%= slugify(title || "") %>">
|
8
|
+
<thead>
|
9
|
+
<tr>
|
10
|
+
<% header.each do |heading| %>
|
11
|
+
<th class="<%= slugify(heading) %>"><%= heading %></th>
|
12
|
+
<% end %>
|
13
|
+
</tr>
|
14
|
+
</thead>
|
15
|
+
<tbody>
|
16
|
+
<% rows.each do |row| %>
|
17
|
+
<tr>
|
18
|
+
<% row.each_with_index do |cell, i| %>
|
19
|
+
<td class="<%= slugify (header[i] || "") %>"><%= cell %></td>
|
20
|
+
<% end %>
|
21
|
+
</tr>
|
22
|
+
<% end %>
|
23
|
+
</tbody>
|
24
|
+
</table>
|
25
|
+
|