apache_log_report 0.9.0 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,510 @@
1
- require 'apache_log_report/log_parser_sqlite3.rb'
2
- require 'apache_log_report/option_parser.rb'
3
- require 'apache_log_report/version.rb'
1
+ module ApacheLogReport
2
+
3
+ #
4
+ # parse command line options
5
+ #
6
+ require 'optparse'
7
+ require 'optparse/date'
8
+
9
+ def self.options_parse options
10
+ limit = 30
11
+ args = {}
12
+
13
+ opt_parser = OptionParser.new do |opts|
14
+ opts.banner = "Usage: log-analyzer.rb [options] logfile"
15
+
16
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
17
+ args[:limit] = n
18
+ end
19
+
20
+ opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
21
+ args[:from_date] = n
22
+ end
23
+
24
+ opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
25
+ args[:to_date] = n
26
+ end
27
+
28
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
29
+ args[:ignore_crawlers] = true
30
+ end
31
+
32
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
33
+ args[:no_selfpoll] = true
34
+ end
35
+
36
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
37
+ args[:only_crawlers] = true
38
+ end
39
+
40
+ opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
41
+ args[:prefix] = n
42
+ end
43
+
44
+ opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
45
+ args[:suffix] = n
46
+ end
47
+
48
+ opts.on("-c", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
49
+ args[:code_export] = n
50
+ end
51
+
52
+ opts.on("-h", "--help", "Prints this help") do
53
+ puts opts
54
+ exit
55
+ end
56
+ end
57
+
58
+ opt_parser.parse!(options)
59
+
60
+ args[:limit] ||= limit
61
+ args[:ignore_crawlers] ||= false
62
+ args[:no_selfpoll] ||= false
63
+ args[:only_crawlers] ||= false
64
+ args[:prefix] ||= ""
65
+ args[:suffix] ||= ""
66
+ args[:code_export] ||= "both"
67
+
68
+ return args
69
+ end
70
+
71
+
72
+
73
+ #
74
+ # parse an Apache log file and return a SQLite3 DB
75
+ #
76
+ require 'apache_log/parser'
77
+ require 'sqlite3'
78
+ require 'browser'
79
+
80
+ def self.parse filename, options = {}
81
+ content = filename ? File.readlines(filename) : ARGF.readlines
82
+
83
+ db = SQLite3::Database.new ":memory:"
84
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
85
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
86
+ datetime TEXT,
87
+ ip TEXT,
88
+ user TEXT,
89
+ unique_visitor TEXT,
90
+ method TEXT,
91
+ path TEXT,
92
+ extension TEXT,
93
+ status TEXT,
94
+ size INTEGER,
95
+ referer TEXT,
96
+ user_agent TEXT,
97
+ bot INTEGER,
98
+ browser TEXT,
99
+ browser_version TEXT,
100
+ platform TEXT,
101
+ platform_version TEXT)"
102
+
103
+ ins = db.prepare('insert into LogLine (
104
+ datetime,
105
+ ip,
106
+ user,
107
+ unique_visitor,
108
+ method,
109
+ path,
110
+ extension,
111
+ status,
112
+ size,
113
+ referer,
114
+ user_agent,
115
+ bot,
116
+ browser,
117
+ browser_version,
118
+ platform,
119
+ platform_version)
120
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
121
+
122
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
123
+
124
+ content.each do |line|
125
+ begin
126
+ hash = parser.parse line
127
+
128
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
129
+ ins.execute(
130
+ hash[:datetime].iso8601,
131
+ hash[:remote_host],
132
+ hash[:user],
133
+ hash[:datetime].iso8601 + " " + hash[:remote_host] + " " + hash[:user_agent],
134
+ hash[:request][:method],
135
+ hash[:request][:path],
136
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
137
+ hash[:status],
138
+ hash[:size].to_i,
139
+ hash[:referer],
140
+ hash[:user_agent],
141
+ ua.bot? ? 1 : 0,
142
+ (ua.name || ""),
143
+ (ua.version || ""),
144
+ (ua.platform.name || ""),
145
+ (ua.platform.version || "")
146
+ )
147
+ rescue
148
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
149
+ end
150
+ end
151
+
152
+ db
153
+ end
154
+
155
+ #
156
+ # take a sqlite3 databae and analyze data
157
+ #
158
+ def self.analyze_data db, options = {}
159
+
160
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
161
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
162
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
163
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
164
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
165
+
166
+ #
167
+ # generate the where clause corresponding to the command line options to filter data
168
+ #
169
+ @filter = [
170
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
171
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
172
+ (options[:only_crawlers] ? "bot == 1" : nil),
173
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
174
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
175
+ "true"
176
+ ].compact.join " and "
177
+
178
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
179
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
180
+ @total_size = db.execute "SELECT sum(size) from LogLine where #{@filter}"
181
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
182
+
183
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by date(datetime)"
184
+
185
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by strftime('%H', datetime)"
186
+
187
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), sum(size) from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
188
+
189
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
190
+
191
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
192
+
193
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
194
+
195
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
196
+ "extension != '#{x}'"
197
+ }.join " and "
198
+
199
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
200
+
201
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
202
+
203
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
204
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
205
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
206
+
207
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
208
+ [x[0], x[1].map { |y| y[1] }].flatten
209
+ }
210
+
211
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by browser order by count(browser) desc"
212
+
213
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by platform order by count(platform) desc"
214
+
215
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
216
+
217
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
218
+ end
219
+
220
+
221
+ #
222
+ # Emit Data
223
+ #
224
+
225
+ require 'terminal-table'
226
+
227
+ def self.output_table name, headings, rows
228
+ name = "#+NAME: #{name}"
229
+ table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
230
+
231
+ #(2..headings.size).each do |i|
232
+ # table.align_column(i, :right)
233
+ #end
234
+
235
+ name + "\n" + table.to_s
236
+ end
237
+
238
+ def self.emit options = {}, command, log_file, started_at, ended_at, duration
239
+ @prefx = options[:prefix]
240
+ @suffix = options[:suffix]
241
+ @export = options[:code_export]
242
+
243
+ <<EOS
244
+ #+TITLE: Apache Log Analysis: #{log_file}
245
+ #+DATE: <#{Date.today}>
246
+ #+STARTUP: showall
247
+ #+OPTIONS: ^:{}
248
+ #+HTML_HEAD: <link rel="stylesheet" type="text/css" href="ala-style.css" />
249
+ #+OPTIONS: html-style:nil
250
+
251
+ * Summary
252
+
253
+ | Hits | #{"%10d" % @total_hits[0][0]} |
254
+ | Unique Visitors | #{"%10d" % @total_unique_visitors[0][0] } |
255
+ | Tx | #{"%10d" % @total_size[0][0] } |
256
+ | Days | #{"%10d" % @total_days[0][0] } |
257
+
258
+ * Daily Distribution
259
+
260
+ #{ output_table "daily_distribution", ["Day", "Hits", "Visits", "Size"], @daily_distribution }
261
+
262
+ #+BEGIN_SRC gnuplot :var data = daily_distribution :results output :exports #{@export} :file #{@prefix}daily#{@suffix}.svg
263
+ reset
264
+ set grid ytics linestyle 0
265
+ set grid xtics linestyle 0
266
+ set terminal svg size 1200,800 fname 'Arial'
267
+
268
+ set xdata time
269
+ set timefmt "%Y-%m-%d"
270
+ set format x "%a, %b %d"
271
+ set xtics rotate by 60 right
272
+
273
+ set title "Hits and Visitors"
274
+ set xlabel "Date"
275
+ set ylabel "Hits"
276
+ set y2label "Visits"
277
+ set y2tics
278
+
279
+ set style fill transparent solid 0.2 noborder
280
+
281
+ plot data using 1:2 with linespoints lw 3 lc rgb "#0000AA" pointtype 5 title "Hits" axes x1y2, \\
282
+ data using 1:2 with filledcurves below x1 linecolor rgb "#0000AA" notitle axes x1y2, \\
283
+ data using 1:3 with linespoints lw 3 lc rgb "#AA0000" pointtype 7 title "Visitors", \\
284
+ data using 1:3 with filledcurves below x1 notitle linecolor rgb "#AA0000", \\
285
+ data using 1:($3+10):3 with labels notitle textcolor rgb "#AA0000", \\
286
+ data using 1:($2+100):2 with labels notitle textcolor rgb "#0000AA" axes x1y2
287
+ #+END_SRC
288
+
289
+
290
+ * Time Distribution
291
+
292
+ #{ output_table "time_distribution", ["Hour", "Hits", "Visits", "Size"], @time_distribution }
293
+
294
+
295
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports #{@export} :file #{@prefix}time#{@suffix}.svg
296
+ reset
297
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
298
+
299
+ set grid ytics linestyle 0
300
+
301
+ set title "Hits and Visitors"
302
+ set xlabel "Date"
303
+ set ylabel "Hits"
304
+ set y2label "Visitors"
305
+ set y2tics
306
+
307
+ set style fill solid 0.25
308
+ set boxwidth 0.6
309
+
310
+ set style data histograms
311
+ set style histogram clustered gap 1
312
+
313
+ plot data using 2:xtic(1) lc rgb "#0000AA" title "Hits", \\
314
+ data using 3 lc rgb "#AA0000" title "Visitors" axes x1y2, \\
315
+ data using ($0 - 0.2):($2 + 10):2 with labels title "" textcolor rgb("#0000AA"), \\
316
+ data using ($0 + 0.2):($3 + 10):3 with labels title "" textcolor rgb("#AA0000") axes x1y2
317
+ #+END_SRC
318
+
319
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports #{@export} :file #{@prefix}time-traffic#{@suffix}.svg
320
+ reset
321
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
322
+
323
+ set grid ytics linestyle 0
324
+
325
+ set title "Traffic"
326
+ set xlabel "Date"
327
+ set ylabel "Traffic"
328
+
329
+ set style fill solid 0.50
330
+ set boxwidth 0.6
331
+
332
+ set style data histograms
333
+ set style histogram clustered gap 1
334
+
335
+ plot data using 2:xtic(1) lc rgb "#00AA00" title "Traffic", \\
336
+ data using ($0):($2 + 10):2 with labels title "" textcolor rgb("#00AA00")
337
+ #+END_SRC
338
+
339
+ * Most Requested Pages
340
+
341
+ #{ output_table "most_requested_pages", ["Path", "Hits", "Visits", "Size"], @most_requested_pages }
342
+
343
+ * Most Requested URIs
344
+
345
+ #{ output_table "most_requested_resources", ["Path", "Hits", "Visits", "Size"], @most_requested_resources }
346
+
347
+ * 404s on HTML files
348
+
349
+ #{ output_table "pages_404", ["Path", "Hits", "Visitors"], @missed_pages }
350
+
351
+ * 404s on other resources
352
+
353
+ #{ output_table "resources_404", ["Path", "Hits", "Visitors"], @missed_resources }
354
+
355
+ * Possible Attacks
356
+
357
+ #{ output_table "attacks", ["Path", "Hits", "Visitors"], @attacks }
358
+
359
+ * Statuses
360
+
361
+ #{ output_table "statuses", ["Status", "Count"], @statuses }
362
+
363
+ #+BEGIN_SRC gnuplot :var data = statuses :results output :exports #{@export} :file #{@prefix}statuses#{@suffix}.svg
364
+ reset
365
+ set grid ytics linestyle 0
366
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
367
+
368
+ set style fill solid 0.25
369
+ set boxwidth 0.6
370
+
371
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
372
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
373
+ #+END_SRC
374
+
375
+ * Daily Statuses
376
+
377
+ #{ output_table "daily_statuses", ["Status", "2xx", "3xx", "4xx"], @statuses_by_day }
378
+
379
+ #+BEGIN_SRC gnuplot :var data = daily_statuses :results output :exports #{@export} :file #{@prefix}daily-statuses#{@suffix}.svg
380
+ reset
381
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
382
+
383
+ set grid ytics linestyle 0
384
+
385
+ set title "Daily Statuses"
386
+ set xlabel "Date"
387
+ set ylabel "Number of Hits"
388
+ set xtics rotate by 60 right
389
+
390
+ set style fill solid 0.25
391
+ set boxwidth 0.6
392
+
393
+ set style data histograms
394
+ set style histogram clustered gap 1
395
+
396
+ plot data using 2:xtic(1) lc rgb "#00AA00" title "2xx", \\
397
+ data using 3 lc rgb "#0000CC" title "3xx", \\
398
+ data using 4 lc rgb "#AA0000" title "4xx", \\
399
+ data using ($0 - 1. / 4):($2 + 0.5):2 with labels title "" textcolor rgb("#00AA00"), \\
400
+ data using ($0):($3 + 0.5):3 with labels title "" textcolor rgb("#0000CC"), \\
401
+ data using ($0 + 1. / 4):($4 + 0.5):4 with labels title "" textcolor rgb("#AA0000")
402
+ #+END_SRC
403
+
404
+ * Browsers
405
+
406
+ #{ output_table "browsers", ["Browser", "Hits", "Visitors", "Size"], @browsers }
407
+
408
+ #+BEGIN_SRC gnuplot :var data = browsers :results output :exports #{@export} :file #{@prefix}browser#{@suffix}.svg
409
+ reset
410
+ set grid ytics linestyle 0
411
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
412
+
413
+ set style fill solid 0.25
414
+ set boxwidth 0.6
415
+
416
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
417
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
418
+ #+END_SRC
419
+
420
+ * Platforms
421
+
422
+ #{ output_table "platforms", ["Platform", "Hits", "Visitors", "Size"], @platforms }
423
+
424
+ #+BEGIN_SRC gnuplot :var data = platforms :results output :exports #{@export} :file #{@prefix}platforms#{@suffix}.svg
425
+ reset
426
+ set grid ytics linestyle 0
427
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
428
+
429
+ set style fill solid 0.25
430
+ set boxwidth 0.6
431
+
432
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
433
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
434
+ #+END_SRC
435
+
436
+ * IPs
437
+
438
+ #{ output_table "ips", ["IPs", "Hits", "Visitors", "Size"], @ips }
439
+
440
+
441
+ * Referers
442
+
443
+ #{ output_table "referers", ["Referers", "Hits", "Visitors", "Size"], @referers }
444
+
445
+ #+BEGIN_SRC gnuplot :var data = referers :results output :exports #{@export} :file #{@prefix}referers#{@suffix}.svg
446
+ reset
447
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
448
+
449
+ set grid ytics linestyle 0
450
+ set grid xtics linestyle 0
451
+
452
+ set title "Referers"
453
+ set xlabel "Date"
454
+ set xtics rotate by 60 right
455
+ set ylabel "Hits and Visits"
456
+
457
+ set style fill solid 0.45
458
+ set boxwidth 0.7
459
+
460
+ set style data histograms
461
+ set style histogram clustered gap 1
462
+
463
+ plot data using 2:xtic(1) lc rgb "#AA00AA" title "Hits", \\
464
+ data using 3 lc rgb "#0AAAA0" title "Visits", \\
465
+ data using ($0 - 1. / 3):($2 + 50):2 with labels title "" textcolor rgb("#AA00AA"), \\
466
+ data using ($0 + 1. / 3):($3 + 50):3 with labels title "" textcolor rgb("#0AAAA0")
467
+ #+END_SRC
468
+
469
+ * Command Invocation and Performance
470
+
471
+ ** Command Invocation
472
+
473
+ #+BEGIN_EXAMPLE shell
474
+ #{command}
475
+ #+END_EXAMPLE
476
+
477
+ | Input file | #{"%-50s" % (log_file || "stdin")} |
478
+ | Ignore crawlers | #{"%-50s" % options[:ignore_crawlers]} |
479
+ | Only crawlers | #{"%-50s" % options[:only_crawlers]} |
480
+ | No selfpoll | #{"%-50s" % options[:no_selfpoll]} |
481
+ | Filter by date | #{"%-50s" % (options[:from_date] != nil or options[:to_date] != nil)} |
482
+ | Prefix | #{"%-50s" % @prefix} |
483
+ | Suffix | #{"%-50s" % @suffix} |
484
+
485
+ ** Log Structure
486
+
487
+ | Log size | #{"%10d" % @log_size[0][0]} |
488
+ | Self poll entries | #{"%10d" % @selfpolls_size[0][0]} |
489
+ | Crawlers | #{"%10d" % @crawlers_size[0][0]} |
490
+ | Entries considered | #{"%10d" % @total_hits[0][0]} |
491
+
492
+ ** Performance
493
+
494
+ | Analysis started at | #{started_at.to_s} |
495
+ | Analysis ended at | #{ended_at.to_s} |
496
+ | Duration (sec) | #{"%5.3d" % duration } |
497
+ | Duration (min) | #{"%5.3d" % (duration / 60 )} |
498
+ | Log size | #{"%9d" % @log_size[0][0]} |
499
+ | Lines/sec | #{"%6.2f" % (@log_size[0][0] / duration)} |
500
+
501
+ * Local Variables :noexport:
502
+ # Local Variables:
503
+ # org-confirm-babel-evaluate: nil
504
+ # org-display-inline-images: t
505
+ # end:
506
+ EOS
507
+ end
508
+ end
509
+
4
510