apache_log_report 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,505 @@
1
- require 'apache_log_report/log_parser_sqlite3.rb'
2
- require 'apache_log_report/option_parser.rb'
3
- require 'apache_log_report/version.rb'
1
+ module ApacheLogReport
2
+
3
+ #
4
+ # parse command line options
5
+ #
6
+ require 'optparse'
7
+ require 'optparse/date'
8
+
9
+ def self.options_parse options
10
+ limit = 30
11
+ args = {}
12
+
13
+ opt_parser = OptionParser.new do |opts|
14
+ opts.banner = "Usage: log-analyzer.rb [options] logfile"
15
+
16
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
17
+ args[:limit] = n
18
+ end
19
+
20
+ opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
21
+ args[:from_date] = n
22
+ end
23
+
24
+ opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
25
+ args[:to_date] = n
26
+ end
27
+
28
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
29
+ args[:ignore_crawlers] = true
30
+ end
31
+
32
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
33
+ args[:no_selfpoll] = true
34
+ end
35
+
36
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
37
+ args[:only_crawlers] = true
38
+ end
39
+
40
+ opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
41
+ args[:prefix] = n
42
+ end
43
+
44
+ opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
45
+ args[:suffix] = n
46
+ end
47
+
48
+ opts.on("-h", "--help", "Prints this help") do
49
+ puts opts
50
+ exit
51
+ end
52
+ end
53
+
54
+ opt_parser.parse!(options)
55
+
56
+ args[:limit] ||= limit
57
+ args[:ignore_crawlers] ||= false
58
+ args[:no_selfpoll] ||= false
59
+ args[:only_crawlers] ||= false
60
+ args[:prefix] ||= ""
61
+ args[:suffic] ||= ""
62
+
63
+ return args
64
+ end
65
+
66
+
67
+
68
+ #
69
+ # parse an Apache log file and return a SQLite3 DB
70
+ #
71
+ require 'apache_log/parser'
72
+ require 'sqlite3'
73
+ require 'browser'
74
+
75
+ def self.parse filename, options = {}
76
+ content = filename ? File.readlines(filename) : ARGF.readlines
77
+
78
+ db = SQLite3::Database.new ":memory:"
79
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
80
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
81
+ datetime TEXT,
82
+ ip TEXT,
83
+ user TEXT,
84
+ unique_visitor TEXT,
85
+ method TEXT,
86
+ path TEXT,
87
+ extension TEXT,
88
+ status TEXT,
89
+ size INTEGER,
90
+ referer TEXT,
91
+ user_agent TEXT,
92
+ bot INTEGER,
93
+ browser TEXT,
94
+ browser_version TEXT,
95
+ platform TEXT,
96
+ platform_version TEXT)"
97
+
98
+ ins = db.prepare('insert into LogLine (
99
+ datetime,
100
+ ip,
101
+ user,
102
+ unique_visitor,
103
+ method,
104
+ path,
105
+ extension,
106
+ status,
107
+ size,
108
+ referer,
109
+ user_agent,
110
+ bot,
111
+ browser,
112
+ browser_version,
113
+ platform,
114
+ platform_version)
115
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
116
+
117
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
118
+
119
+ content.collect { |line|
120
+ hash = parser.parse line
121
+
122
+ if hash != {}
123
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
124
+
125
+ ins.execute(
126
+ hash[:datetime].iso8601,
127
+ hash[:remote_host],
128
+ hash[:user],
129
+ hash[:remote_host] + hash[:user_agent],
130
+ hash[:request][:method],
131
+ hash[:request][:path],
132
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
133
+ hash[:status],
134
+ hash[:size].to_i,
135
+ hash[:referer],
136
+ hash[:user_agent],
137
+ ua.bot? ? 1 : 0,
138
+ (ua.name || ""),
139
+ (ua.version || ""),
140
+ (ua.platform.name || ""),
141
+ (ua.platform.version || "")
142
+ )
143
+ end
144
+ }
145
+
146
+ db
147
+ end
148
+
149
+
150
+ def self.reasonable_response_type ext
151
+
152
+ end
153
+
154
+ #
155
+ # take a sqlite3 databae and analyze data
156
+ #
157
+ def self.analyze_data db, options = {}
158
+
159
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
160
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
161
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
162
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
163
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
164
+
165
+ #
166
+ # generate the where clause corresponding to the command line options to filter data
167
+ #
168
+ @filter = [
169
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
170
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
171
+ (options[:only_crawlers] ? "bot == 1" : nil),
172
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
173
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
174
+ "true"
175
+ ].compact.join " and "
176
+
177
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
178
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
179
+ @total_size = db.execute "SELECT sum(size) from LogLine where #{@filter}"
180
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
181
+
182
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by date(datetime)"
183
+
184
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by strftime('%H', datetime)"
185
+
186
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), sum(size) from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
187
+
188
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
189
+
190
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
191
+
192
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
193
+
194
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
195
+ "extension == '#{x}'"
196
+ }.join " or "
197
+
198
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
199
+
200
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
201
+
202
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
203
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
204
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
205
+
206
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
207
+ [x[0], x[1].map { |y| y[1] }].flatten
208
+ }
209
+
210
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by browser order by count(browser) desc"
211
+
212
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by platform order by count(platform) desc"
213
+
214
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
215
+
216
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), sum(size) from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
217
+ end
218
+
219
+
220
+ #
221
+ # Emit Data
222
+ #
223
+
224
+ require 'terminal-table'
225
+
226
+ def self.output_table name, headings, rows
227
+ name = "#+NAME: #{name}"
228
+ table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
229
+
230
+ #(2..headings.size).each do |i|
231
+ # table.align_column(i, :right)
232
+ #end
233
+
234
+ name + "\n" + table.to_s
235
+ end
236
+
237
+ def self.emit options = {}, command, log_file, started_at, ended_at, duration
238
+ @prefx = options[:prefix]
239
+ @suffix = options[:suffix]
240
+
241
+ <<EOS
242
+ #+TITLE: Apache Log Analysis: #{log_file}
243
+ #+DATE: <#{Date.today}>
244
+ #+STARTUP: showall
245
+ #+OPTIONS: ^:{}
246
+ #+HTML_HEAD: <link rel="stylesheet" type="text/css" href="ala-style.css" />
247
+ #+OPTIONS: html-style:nil
248
+
249
+ * Summary
250
+
251
+ | Hits | #{"%10d" % @total_hits[0][0]} |
252
+ | Unique Visitors | #{"%10d" % @total_unique_visitors[0][0] } |
253
+ | Tx | #{"%10d" % @total_size[0][0] } |
254
+ | Days | #{"%10d" % @total_days[0][0] } |
255
+
256
+ * Daily Distribution
257
+
258
+ #{ output_table "daily_distribution", ["Day", "Hits", "Visits", "Size"], @daily_distribution }
259
+
260
+ #+BEGIN_SRC gnuplot :var data = daily_distribution :results output :exports both :file #{@prefix}daily#{@suffix}.svg
261
+ reset
262
+ set grid ytics linestyle 0
263
+ set grid xtics linestyle 0
264
+ set terminal svg size 1200,800 fname 'Arial'
265
+
266
+ set xdata time
267
+ set timefmt "%Y-%m-%d"
268
+ set format x "%a, %b %d"
269
+ set xtics rotate by 60 right
270
+
271
+ set title "Hits and Visitors"
272
+ set xlabel "Date"
273
+ set ylabel "Hits"
274
+ set ylabel2 "Visits"
275
+
276
+ set style fill transparent solid 0.2 noborder
277
+
278
+ plot data using 1:2 with linespoints lw 3 lc rgb "#0000AA" pointtype 5 title "Hits" axes x1y2, \\
279
+ data using 1:2 with filledcurves below x1 linecolor rgb "#0000AA" notitle axes x1y2, \\
280
+ data using 1:3 with linespoints lw 3 lc rgb "#AA0000" pointtype 7 title "Visitors", \\
281
+ data using 1:3 with filledcurves below x1 notitle linecolor rgb "#AA0000", \\
282
+ data using 1:($3+10):3 with labels notitle textcolor rgb "#AA0000", \\
283
+ data using 1:($2+100):2 with labels notitle textcolor rgb "#0000AA" axes x1y2
284
+ #+END_SRC
285
+
286
+
287
+ * Time Distribution
288
+
289
+ #{ output_table "time_distribution", ["Hour", "Hits", "Visits", "Size"], @time_distribution }
290
+
291
+
292
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{@prefix}time#{@suffix}.svg
293
+ reset
294
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
295
+
296
+ set grid ytics linestyle 0
297
+
298
+ set title "Hits and Visitors"
299
+ set xlabel "Date"
300
+ set ylabel "Hits and Visits"
301
+
302
+ set style fill solid 0.25
303
+ set boxwidth 0.6
304
+
305
+ set style data histograms
306
+ set style histogram clustered gap 1
307
+
308
+ plot data using 2:xtic(1) lc rgb "#0000AA" title "Hits", \\
309
+ data using 3 lc rgb "#AA0000" title "Visitors" axes x1y2, \\
310
+ data using ($0 - 0.2):($2 + 10):2 with labels title "" textcolor rgb("#0000AA"), \\
311
+ data using ($0 + 0.2):($3 + 10):3 with labels title "" textcolor rgb("#AA0000") axes x1y2
312
+ #+END_SRC
313
+
314
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{@prefix}time-traffic#{@suffix}.svg
315
+ reset
316
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
317
+
318
+ set grid ytics linestyle 0
319
+
320
+ set title "Traffic"
321
+ set xlabel "Date"
322
+ set ylabel "Traffic"
323
+
324
+ set style fill solid 0.50
325
+ set boxwidth 0.6
326
+
327
+ set style data histograms
328
+ set style histogram clustered gap 1
329
+
330
+ plot data using 2:xtic(1) lc rgb "#00AA00" title "Traffic", \\
331
+ data using ($0):($2 + 10):2 with labels title "" textcolor rgb("#00AA00")
332
+ #+END_SRC
333
+
334
+ * Most Requested Pages
335
+
336
+ #{ output_table "most_requested_pages", ["Path", "Hits", "Visits", "Size"], @most_requested_pages }
337
+
338
+ * Most Requested URIs
339
+
340
+ #{ output_table "most_requested_resources", ["Path", "Hits", "Visits", "Size"], @most_requested_resources }
341
+
342
+ * 404s on HTML files
343
+
344
+ #{ output_table "pages_404", ["Path", "Hits", "Visitors"], @missed_pages }
345
+
346
+ * 404s on other resources
347
+
348
+ #{ output_table "resources_404", ["Path", "Hits", "Visitors"], @missed_resources }
349
+
350
+ * Possible Attacks
351
+
352
+ #{ output_table "Attacks", ["Path", "Hits", "Visitors"], @attacks }
353
+
354
+ * Statuses
355
+
356
+ #{ output_table "statuses", ["Status", "Count"], @statuses }
357
+
358
+ #+BEGIN_SRC gnuplot :var data = statuses :results output :exports both :file #{@prefix}statuses#{@suffix}.svg
359
+ reset
360
+ set grid ytics linestyle 0
361
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
362
+
363
+ set style fill solid 0.25
364
+ set boxwidth 0.6
365
+
366
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
367
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
368
+ #+END_SRC
369
+
370
+ * Daily Statuses
371
+
372
+ #{ output_table "daily_statuses", ["Status", "2xx", "3xx", "4xx"], @statuses_by_day }
373
+
374
+ #+BEGIN_SRC gnuplot :var data = daily_statuses :results output :exports both :file #{@prefix}daily-statuses#{@suffix}.svg
375
+ reset
376
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
377
+
378
+ set grid ytics linestyle 0
379
+
380
+ set title "Daily Statuses"
381
+ set xlabel "Date"
382
+ set ylabel "Number of Hits"
383
+ set xtics rotate by 60 right
384
+
385
+ set style fill solid 0.25
386
+ set boxwidth 0.6
387
+
388
+ set style data histograms
389
+ set style histogram clustered gap 1
390
+
391
+ plot data using 2:xtic(1) lc rgb "#CC0000" title "4xx", \\
392
+ data using 3 lc rgb "#0000CC" title "3xx", \\
393
+ data using 4 lc rgb "#00AA00" title "2xx", \\
394
+ data using ($0 - 1. / 4):($2 + 0.5):2 with labels title "" textcolor rgb("#CC0000"), \\
395
+ data using ($0):($3 + 0.5):3 with labels title "" textcolor rgb("#0000CC"), \\
396
+ data using ($0 + 1. / 4):($4 + 0.5):4 with labels title "" textcolor rgb("#00AA00")
397
+ #+END_SRC
398
+
399
+ * Browsers
400
+
401
+ #{ output_table "browsers", ["Browser", "Hits", "Visitors", "Size"], @browsers }
402
+
403
+ #+BEGIN_SRC gnuplot :var data = browsers :results output :exports both :file #{@prefix}browser#{@suffix}.svg
404
+ reset
405
+ set grid ytics linestyle 0
406
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
407
+
408
+ set style fill solid 0.25
409
+ set boxwidth 0.6
410
+
411
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
412
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
413
+ #+END_SRC
414
+
415
+ * Platforms
416
+
417
+ #{ output_table "platforms", ["Platform", "Hits", "Visitors", "Size"], @platforms }
418
+
419
+ #+BEGIN_SRC gnuplot :var data = platforms :results output :exports both :file #{@prefix}platforms#{@suffix}.svg
420
+ reset
421
+ set grid ytics linestyle 0
422
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
423
+
424
+ set style fill solid 0.25
425
+ set boxwidth 0.6
426
+
427
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
428
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
429
+ #+END_SRC
430
+
431
+ * IPs
432
+
433
+ #{ output_table "ips", ["IPs", "Hits", "Visitors", "Size"], @ips }
434
+
435
+
436
+ * Referers
437
+
438
+ #{ output_table "referers", ["Referers", "Hits", "Visitors", "Size"], @referers }
439
+
440
+ #+BEGIN_SRC gnuplot :var data = referers :results output :exports both :file #{@prefix}referers#{@suffix}.svg
441
+ reset
442
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
443
+
444
+ set grid ytics linestyle 0
445
+ set grid xtics linestyle 0
446
+
447
+ set title "Referers"
448
+ set xlabel "Date"
449
+ set xtics rotate by 60 right
450
+ set ylabel "Hits and Visits"
451
+
452
+ set style fill solid 0.45
453
+ set boxwidth 0.7
454
+
455
+ set style data histograms
456
+ set style histogram clustered gap 1
457
+
458
+ plot data using 2:xtic(1) lc rgb "#AA00AA" title "Hits", \\
459
+ data using 3 lc rgb "#0AAAA0" title "Visits", \\
460
+ data using ($0 - 1. / 3):($2 + 50):2 with labels title "" textcolor rgb("#AA00AA"), \\
461
+ data using ($0 + 1. / 3):($3 + 50):3 with labels title "" textcolor rgb("#0AAAA0")
462
+ #+END_SRC
463
+
464
+ * Command Invocation and Performance
465
+
466
+ ** Command Invocation
467
+
468
+ #+BEGIN_EXAMPLE shell
469
+ #{command}
470
+ #+END_EXAMPLE
471
+
472
+ | Input file | #{"%-50s" % (log_file || "stdin")} |
473
+ | Ignore crawlers | #{"%-50s" % options[:ignore_crawlers]} |
474
+ | Only crawlers | #{"%-50s" % options[:only_crawlers]} |
475
+ | No selfpoll | #{"%-50s" % options[:no_selfpoll]} |
476
+ | Filter by date | #{"%-50s" % (options[:from_date] != nil or options[:to_date] != nil)} |
477
+ | Prefix | #{"%-50s" % @prefix} |
478
+ | Suffix | #{"%-50s" % @suffix} |
479
+
480
+ ** Log Structure
481
+
482
+ | Log size | #{"%10d" % @log_size[0][0]} |
483
+ | Self poll entries | #{"%10d" % @selfpolls_size[0][0]} |
484
+ | Crawlers | #{"%10d" % @crawlers_size[0][0]} |
485
+ | Entries considered | #{"%10d" % @total_hits[0][0]} |
486
+
487
+ ** Performance
488
+
489
+ | Analysis started at | #{started_at.to_s} |
490
+ | Analysis ended at | #{ended_at.to_s} |
491
+ | Duration (sec) | #{"%5.3d" % duration } |
492
+ | Duration (min) | #{"%5.3d" % (duration / 60 )} |
493
+ | Log size | #{"%9d" % @log_size[0][0]} |
494
+ | Lines/sec | #{"%6.2f" % (@log_size[0][0] / duration)} |
495
+
496
+ * Local Variables :noexport:
497
+ # Local Variables:
498
+ # org-confirm-babel-evaluate: nil
499
+ # org-display-inline-images: t
500
+ # end:
501
+ EOS
502
+ end
503
+ end
504
+
4
505