apache_log_report 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/apache_log_report.gemspec +2 -0
- data/exe/apache_log_report +16 -611
- data/lib/apache_log_report.rb +504 -3
- data/lib/apache_log_report/version.rb +1 -1
- metadata +29 -4
- data/lib/apache_log_report/log_parser_hash.rb +0 -49
- data/lib/apache_log_report/log_parser_sqlite3.rb +0 -99
- data/lib/apache_log_report/option_parser.rb +0 -63
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a790d10e531dbe48d5bff951de0e2a3bc47188dcffd03d952597019012e76bed
|
4
|
+
data.tar.gz: 22ff24d2b7a292bd8ef58d0ca49d0f55aa3a12d9400568ba46da5f0280fed3b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abe3454d3679848d92f375f73b18efee2c090d83ba6d069146e3773542dccaa2c924b9b9813bc1311ad71ad82acb970d570a3f08674b4ffca00f6efbc80fde1a
|
7
|
+
data.tar.gz: 60e70449964fb3460eec9a8c0ac6f155959ea697f694a65755e79beb13fe25a2199ebb8b17e01f941c022035791436f062fb909617c0eae785fd28f400a07fa0
|
data/apache_log_report.gemspec
CHANGED
@@ -27,6 +27,8 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
+
spec.add_dependency "apache_log-parser"
|
30
31
|
spec.add_dependency "browser"
|
31
32
|
spec.add_dependency "sqlite3"
|
33
|
+
spec.add_dependency "terminal-table"
|
32
34
|
end
|
data/exe/apache_log_report
CHANGED
@@ -1,627 +1,32 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'apache_log_report'
|
4
|
-
require 'progressbar'
|
5
|
-
|
6
|
-
LIMIT = 30
|
7
|
-
|
8
|
-
##############################################################################
|
9
|
-
# MONKEY PATCH ARRAY AND HASH TO BUILD PIPES WHEN COMPUTING STATS
|
10
|
-
|
11
|
-
class Array
|
12
|
-
# counts occurrences of each element in an array and returns a hash { element => count }
|
13
|
-
def uniq_with_count
|
14
|
-
h = Hash.new(0); self.each { |l| h[l] += 1 }; h
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
class Hash
|
19
|
-
# sort and limit entries of a hash. Returns an array which can be output
|
20
|
-
# sort by key or value, optionally reverse, optionally return a limited number of items
|
21
|
-
# {2014 => 10, 2015 => 20, 2010 => 30} => [[2014, 10], ...]
|
22
|
-
def prepare_for_output options = {}
|
23
|
-
sorted_a = []
|
24
|
-
|
25
|
-
if options[:sort] == :key
|
26
|
-
sorted_a = self.sort_by { |k, v| k }
|
27
|
-
elsif options[:sort]
|
28
|
-
sorted_a = self.sort_by { |k, v| v[options[:sort]] }
|
29
|
-
else
|
30
|
-
sorted_a = self.to_a
|
31
|
-
end
|
32
|
-
|
33
|
-
sorted_a = sorted_a.reverse if options[:reverse]
|
34
|
-
sorted_a = sorted_a[0, options[:limit]] if options[:limit]
|
35
|
-
|
36
|
-
sorted_a
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
##############################################################################
|
41
|
-
# EMITTERS ARE USED TO OUTPUT TO DIFFERENT FORMATS
|
42
|
-
|
43
|
-
class OrgTableEmitter
|
44
|
-
# print either of:
|
45
|
-
# [[ row_title, value, value, value, ...], ...]
|
46
|
-
# [[ row_title, {key: value, key: value, key: value}, ...], ...]
|
47
|
-
def self.emit table, options = {}
|
48
|
-
if table.size == 0
|
49
|
-
puts "No values to show"
|
50
|
-
return
|
51
|
-
end
|
52
|
-
|
53
|
-
puts "\n\n#{options[:title]}\n\n" if options[:title]
|
54
|
-
|
55
|
-
puts "#+NAME: #{options[:name]}" if options[:name]
|
56
|
-
|
57
|
-
# if table is in the form of a hash, transform it into an array
|
58
|
-
if table[0][1].class == Hash then
|
59
|
-
table = table.map { |x| [ x[0] ] + x[1].values }
|
60
|
-
end
|
61
|
-
|
62
|
-
# get longest row title and then longest column or use 50 and 10 as defaults
|
63
|
-
firstcol_length = options[:compact] ? 2 + table.map { |x| x[0].to_s.size }.max : 50
|
64
|
-
othercol_length = options[:compact] ? 2 + table.map { |x| x[1..-1].map { |x| x.to_s.size }.max }.max : 10
|
65
|
-
# take into account also the headers lengths'
|
66
|
-
headers_length = options[:headers] ? 2 + options[:headers][1..-1].map { |x| x.to_s.size }.max : 0
|
67
|
-
othercol_length = [othercol_length, headers_length].max
|
68
|
-
|
69
|
-
# build the formatting string
|
70
|
-
col_sizes = [ firstcol_length ] + [othercol_length] * table[0][1..-1].size
|
71
|
-
col_classes = table[0].map { |x| x.class.to_s }
|
72
|
-
col_formats = col_classes.each_with_index.map { |x, i| format_for(x, col_sizes[i]) }.join("") + "|"
|
73
|
-
|
74
|
-
# print header if asked to do so
|
75
|
-
if options[:headers]
|
76
|
-
puts (col_sizes.map { |x| "| %-#{x}s " }.join("") % options[:headers]) + "|"
|
77
|
-
puts (col_sizes.map { |x| "|#{"-" * (2 + x)}" }.join("")) + "|"
|
78
|
-
end
|
79
|
-
|
80
|
-
# print each table row
|
81
|
-
table.each do |row|
|
82
|
-
puts col_formats % row
|
83
|
-
end
|
84
|
-
|
85
|
-
puts "\n"
|
86
|
-
end
|
87
|
-
|
88
|
-
private
|
89
|
-
|
90
|
-
def self.format_for klass, size
|
91
|
-
case klass
|
92
|
-
when "String"
|
93
|
-
"| %-#{size}s "
|
94
|
-
when "Integer"
|
95
|
-
"| %#{size}d "
|
96
|
-
when "Double"
|
97
|
-
"| %#{size}.2f "
|
98
|
-
when "Float"
|
99
|
-
"| %#{size}.2f "
|
100
|
-
else
|
101
|
-
"| %#{size}s "
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
##############################################################################
|
109
|
-
# PARSE OPTIONS
|
110
|
-
|
111
|
-
|
112
|
-
##############################################################################
|
113
|
-
# PARSE COMMAND LINE, SETS OPTIONS, PERFORM BASIC CHECKS, AND RUN
|
114
|
-
|
115
|
-
def days log
|
116
|
-
(log.last[:date_time].to_date - log.first[:date_time].to_date).to_i
|
117
|
-
end
|
118
|
-
|
119
|
-
options = OptionParser.parse ARGV
|
120
|
-
|
121
|
-
limit = options[:limit]
|
122
|
-
from_date = options[:from_date]
|
123
|
-
to_date = options[:to_date]
|
124
|
-
|
125
|
-
ignore_crawlers = options[:ignore_crawlers]
|
126
|
-
only_crawlers = options[:only_crawlers]
|
127
|
-
distinguish_crawlers = options[:distinguish_crawlers]
|
128
|
-
|
129
|
-
no_selfpoll = options[:no_selfpoll]
|
130
|
-
prefix = options[:prefix] ? "#{options[:prefix]}" : ""
|
131
|
-
suffix = options[:suffix] ? "#{options[:suffix]}" : ""
|
132
|
-
log_file = ARGV[0]
|
133
|
-
|
134
|
-
|
135
|
-
if log_file and not File.exist? log_file
|
136
|
-
puts "Error: file #{log_file} does not exist"
|
137
|
-
exit 1
|
138
|
-
end
|
139
|
-
|
140
|
-
##############################################################################
|
141
|
-
# COMPUTE THE STATISTICS
|
142
|
-
|
143
|
-
started_at = Time.now
|
144
|
-
log = LogParser.parse log_file, options
|
145
|
-
days = days(log)
|
146
|
-
|
147
|
-
log_no_selfpolls = log.select { |x| x[:ip] != "::1" }
|
148
|
-
|
149
|
-
log_input = no_selfpoll ? log : log_no_selfpolls
|
150
|
-
|
151
|
-
# get only requested entries
|
152
|
-
log_filtered = log_input.select { |x|
|
153
|
-
(not from_date or from_date <= x[:date_time]) and
|
154
|
-
(not to_date or x[:date_time] <= to_date) and
|
155
|
-
(not ignore_crawlers or x[:bot] == false) and
|
156
|
-
(not only_crawlers or x[:bot] == true)
|
157
|
-
}
|
158
|
-
days_filtered = days(log_filtered)
|
159
|
-
|
160
|
-
printf <<EOS
|
161
|
-
#+TITLE: Apache Log Analysis: #{log_file}
|
162
|
-
#+DATE: <#{Date.today}>
|
163
|
-
#+STARTUP: showall
|
164
|
-
#+OPTIONS: ^:{}
|
165
|
-
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="ala-style.css" />
|
166
|
-
#+OPTIONS: html-style:nil
|
167
|
-
EOS
|
168
|
-
|
169
|
-
puts "\n\n* Summary"
|
170
|
-
|
171
|
-
OrgTableEmitter.emit [ ["Input file", log_file || "stdin"],
|
172
|
-
["Ignore crawlers", options[:ignore_crawlers] == true],
|
173
|
-
["Only crawlers", options[:only_crawlers] == true],
|
174
|
-
["Distinguish crawlers", options[:distinguish_crawlers] == true],
|
175
|
-
["No selfpoll", no_selfpoll],
|
176
|
-
["Filter by date", (options[:from_date] != nil or options[:to_date] != nil)],
|
177
|
-
["Prefix", prefix],
|
178
|
-
["Suffix", suffix]
|
179
|
-
],
|
180
|
-
title: "** Log Analysis Request Summary",
|
181
|
-
compact: true
|
182
|
-
|
183
|
-
|
184
|
-
OrgTableEmitter.emit [ ["First request", log.first[:date_time]],
|
185
|
-
["Last request", log.last[:date_time]],
|
186
|
-
["Days", days.to_s]
|
187
|
-
],
|
188
|
-
title: "** Logging Period",
|
189
|
-
compact: true
|
190
|
-
|
191
|
-
|
192
|
-
OrgTableEmitter.emit [ ["First day (filtered)", log_filtered.first[:date_time]],
|
193
|
-
["Last day (filtered)", log_filtered.last[:date_time]],
|
194
|
-
["Days (filtered)", days_filtered.to_s]
|
195
|
-
],
|
196
|
-
title: "** Portion Analyzed",
|
197
|
-
compact: true
|
198
|
-
|
199
|
-
|
200
|
-
OrgTableEmitter.emit [ ["Log size", log.size],
|
201
|
-
["Self poll entries", log.size - log_no_selfpolls.size],
|
202
|
-
["Entries Parsed", log_input.size],
|
203
|
-
["Entries after filtering", log_filtered.size],
|
204
|
-
],
|
205
|
-
title: "** Filtering",
|
206
|
-
compact: true,
|
207
|
-
name: "size"
|
3
|
+
require 'apache_log_report.rb'
|
208
4
|
|
209
5
|
#
|
210
|
-
#
|
211
|
-
# take an array of hashes, group by a lambda function, count hits, visitors, and tx data
|
6
|
+
# Parse Command Line Arguments
|
212
7
|
#
|
213
|
-
def group_and_count log, key
|
214
|
-
matches = log.group_by { |x| key.call(x) }
|
215
|
-
|
216
|
-
h = {}
|
217
|
-
|
218
|
-
# each key in matches is an array of hashes (all log entries matching key)
|
219
|
-
matches.each do |k, v|
|
220
|
-
h[k] = {
|
221
|
-
hits: v.size,
|
222
|
-
visitors: v.uniq { |x| [ x[:date_time].to_date, x[:ip], x[:user_agent_string] ] }.count,
|
223
|
-
tx: v.map { |x| x[:size] }.inject(&:+) / 1024,
|
224
|
-
}
|
225
|
-
end
|
226
|
-
|
227
|
-
h
|
228
|
-
end
|
229
|
-
|
230
|
-
# like the previous function, but the count function is responsible of returning a hash with the desired data
|
231
|
-
# the previous function is: group_and_generic_count log, key, lamnda { |v| { hits: v.size, visitors: v.uniq ..., tx: v.map ... } }
|
232
|
-
def group_and_generic_count log, key, count
|
233
|
-
matches = log.group_by { |x| key.call(x) }
|
234
|
-
|
235
|
-
h = {}
|
236
|
-
|
237
|
-
# each key in matches is an array of hashes (all log entries matching key)
|
238
|
-
matches.each do |k, v|
|
239
|
-
h[k] = count.call(v)
|
240
|
-
end
|
241
|
-
|
242
|
-
h
|
243
|
-
end
|
244
|
-
|
245
8
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
h[c] = hash.values.map { |x| x[c] }.inject(&:+)
|
250
|
-
end
|
251
|
-
h
|
252
|
-
end
|
253
|
-
|
254
|
-
##############################################################################
|
255
|
-
|
256
|
-
table = group_and_count log_filtered, lambda { |x| x[:date_time].to_date }
|
257
|
-
totals = totals table
|
258
|
-
|
259
|
-
OrgTableEmitter.emit [ ["Hits", totals[:hits]],
|
260
|
-
["Unique Visitors", totals[:visitors]],
|
261
|
-
["Hits / Unique Visitor", totals[:hits] / totals[:visitors].to_f],
|
262
|
-
["TX (Kb)", totals[:tx] ],
|
263
|
-
["TX (Kb) / Unique Visitor", totals[:tx] / totals[:visitors]],
|
264
|
-
],
|
265
|
-
title: "* Totals",
|
266
|
-
name: "totals",
|
267
|
-
compact: true
|
268
|
-
|
269
|
-
if (distinguish_crawlers)
|
270
|
-
bot_table = group_and_count log_filtered.select { |x| x[:bot] }, lambda { |x| x[:date_time].to_date }
|
271
|
-
bot_totals = totals bot_table
|
272
|
-
|
273
|
-
OrgTableEmitter.emit [ ["Hits", bot_totals[:hits]],
|
274
|
-
["Unique Visitors", bot_totals[:visitors]],
|
275
|
-
["Hits / Unique Visitor", bot_totals[:hits] / bot_totals[:visitors].to_f],
|
276
|
-
["TX (Kb)", bot_totals[:tx] ],
|
277
|
-
["TX (Kb) / Unique Visitor", bot_totals[:tx] / bot_totals[:visitors]],
|
278
|
-
],
|
279
|
-
title: "** Bot Totals",
|
280
|
-
name: "bot_totals",
|
281
|
-
compact: true
|
282
|
-
|
283
|
-
vis_table = group_and_count log_filtered.select { |x| not x[:bot] }, lambda { |x| x[:date_time].to_date }
|
284
|
-
vis_totals = totals vis_table
|
285
|
-
|
286
|
-
OrgTableEmitter.emit [ ["Hits", vis_totals[:hits]],
|
287
|
-
["Unique Visitors", vis_totals[:visitors]],
|
288
|
-
["Hits / Unique Visitor", vis_totals[:hits] / vis_totals[:visitors].to_f],
|
289
|
-
["TX (Kb)", vis_totals[:tx] ],
|
290
|
-
["TX (Kb) / Unique Visitor", vis_totals[:tx] / vis_totals[:visitors]],
|
291
|
-
],
|
292
|
-
title: "** Visitors Totals",
|
293
|
-
name: "vis_totals",
|
294
|
-
compact: true
|
295
|
-
|
296
|
-
end
|
297
|
-
|
298
|
-
enriched_table = Hash.new
|
299
|
-
table.map { |k, v| enriched_table[k] = v.merge({ dow: k.wday, month: k.month }) }
|
300
|
-
|
301
|
-
OrgTableEmitter.emit enriched_table.prepare_for_output(sort: :key),
|
302
|
-
title: "* Daily Distribution",
|
303
|
-
compact: true,
|
304
|
-
headers: ["Day", "Hits", "Visitors", "Size", "Wday", "Month"],
|
305
|
-
name: "daily_distribution"
|
306
|
-
|
307
|
-
puts <<EOS
|
308
|
-
#+BEGIN_SRC gnuplot :var data = daily_distribution :results output :exports both :file #{prefix}daily#{suffix}.svg
|
309
|
-
reset
|
310
|
-
set grid ytics linestyle 0
|
311
|
-
set grid xtics linestyle 0
|
312
|
-
set terminal svg size 1200,800 fname 'Arial'
|
313
|
-
|
314
|
-
set xdata time
|
315
|
-
set timefmt "%Y-%m-%d"
|
316
|
-
set format x "%a, %b %d"
|
317
|
-
set xtics rotate by 60 right
|
318
|
-
|
319
|
-
set title "Hits and Visitors"
|
320
|
-
set xlabel "Date"
|
321
|
-
set ylabel "Hits"
|
322
|
-
set ylabel2 "Visits"
|
323
|
-
|
324
|
-
set style fill transparent solid 0.2 noborder
|
325
|
-
|
326
|
-
plot data using 1:2 with linespoints lw 3 lc rgb "#0000AA" pointtype 5 title "Hits" axes x1y2, \\
|
327
|
-
data using 1:2 with filledcurves below x1 linecolor rgb "#0000AA" notitle axes x1y2, \\
|
328
|
-
data using 1:3 with linespoints lw 3 lc rgb "#AA0000" pointtype 7 title "Visitors", \\
|
329
|
-
data using 1:3 with filledcurves below x1 notitle linecolor rgb "#AA0000", \\
|
330
|
-
data using 1:($3+10):3 with labels notitle textcolor rgb "#AA0000", \\
|
331
|
-
data using 1:($2+100):2 with labels notitle textcolor rgb "#0000AA" axes x1y2
|
332
|
-
#+END_SRC
|
333
|
-
EOS
|
334
|
-
|
335
|
-
#
|
336
|
-
# distribution per hour
|
337
|
-
#
|
338
|
-
|
339
|
-
table = group_and_count log_filtered, lambda { |x| x[:date_time].hour }
|
340
|
-
table_processed = table.prepare_for_output(sort: :key).map { |x|
|
341
|
-
["%02d" % x[0] + ":00"] +
|
342
|
-
[ x[1].merge(hits_per_day: x[1][:hits] / days_filtered,
|
343
|
-
visitors_per_day: x[1][:visitors] / days_filtered,
|
344
|
-
tx_per_day: x[1][:tx] / days_filtered) ] }
|
345
|
-
|
346
|
-
OrgTableEmitter.emit table_processed,
|
347
|
-
title: "* Time Distribution",
|
348
|
-
compact: true,
|
349
|
-
headers: ["Time", "Hits", "Visitors", "Size (Kb)", "Hits/Day", "Visit/Day", "Size (Kb)/Day"],
|
350
|
-
name: "time_distribution"
|
351
|
-
|
352
|
-
puts <<EOS
|
353
|
-
#+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time#{suffix}.svg
|
354
|
-
reset
|
355
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
356
|
-
|
357
|
-
set grid ytics linestyle 0
|
358
|
-
|
359
|
-
set title "Hits and Visitors"
|
360
|
-
set xlabel "Date"
|
361
|
-
set ylabel "Hits and Visits"
|
362
|
-
|
363
|
-
set style fill solid 0.25
|
364
|
-
set boxwidth 0.6
|
365
|
-
|
366
|
-
set style data histograms
|
367
|
-
set style histogram clustered gap 1
|
368
|
-
|
369
|
-
plot data using 2:xtic(1) lc rgb "#0000AA" title "Hits", \\
|
370
|
-
data using 3 lc rgb "#AA0000" title "Visitors" axes x1y2, \\
|
371
|
-
data using ($0 - 0.2):($2 + 10):2 with labels title "" textcolor rgb("#0000AA"), \\
|
372
|
-
data using ($0 + 0.2):($3 + 10):3 with labels title "" textcolor rgb("#AA0000") axes x1y2
|
373
|
-
#+END_SRC
|
374
|
-
|
375
|
-
EOS
|
376
|
-
|
377
|
-
puts <<EOS
|
378
|
-
#+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time-traffic#{suffix}.svg
|
379
|
-
reset
|
380
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
381
|
-
|
382
|
-
set grid ytics linestyle 0
|
383
|
-
|
384
|
-
set title "Traffic"
|
385
|
-
set xlabel "Date"
|
386
|
-
set ylabel "Traffic"
|
387
|
-
|
388
|
-
set style fill solid 0.50
|
389
|
-
set boxwidth 0.6
|
390
|
-
|
391
|
-
set style data histograms
|
392
|
-
set style histogram clustered gap 1
|
393
|
-
|
394
|
-
plot data using 2:xtic(1) lc rgb "#00AA00" title "Traffic", \\
|
395
|
-
data using ($0):($2 + 10):2 with labels title "" textcolor rgb("#00AA00")
|
396
|
-
#+END_SRC
|
397
|
-
|
398
|
-
EOS
|
399
|
-
|
400
|
-
#
|
401
|
-
# most requested pages
|
402
|
-
#
|
403
|
-
|
404
|
-
log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] == ".html" }
|
405
|
-
table = group_and_count log_success, lambda { |x| x[:uri] }
|
406
|
-
|
407
|
-
OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
|
408
|
-
title: "* Most Requested Pages",
|
409
|
-
compact: true,
|
410
|
-
headers: ["Page", "Hits", "Visitors", "Size"],
|
411
|
-
name: "pages"
|
412
|
-
|
413
|
-
puts "Total of #{table.size} entries."
|
414
|
-
|
415
|
-
#
|
416
|
-
# most requested URIs
|
417
|
-
#
|
418
|
-
|
419
|
-
log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] != ".html" }
|
420
|
-
table = group_and_count log_success, lambda { |x| x[:uri] }
|
421
|
-
|
422
|
-
OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
|
423
|
-
title: "* Most Requested URIs",
|
424
|
-
compact: true,
|
425
|
-
headers: ["URI", "Hits", "Visitors", "Size"],
|
426
|
-
name: "pages"
|
427
|
-
|
428
|
-
puts "Total of #{table.size} entries."
|
429
|
-
|
430
|
-
#
|
431
|
-
# 404s (Pages)
|
432
|
-
#
|
433
|
-
|
434
|
-
table = log_filtered.select { |x| x[:status] == "404" and x[:type] == ".html" }.map { |x| x[:uri] }.uniq_with_count
|
435
|
-
|
436
|
-
OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
|
437
|
-
title: "* HTML 404s",
|
438
|
-
compact: true,
|
439
|
-
headers: ["Page", "Misses"],
|
440
|
-
name: "page_miss"
|
441
|
-
|
442
|
-
puts "Total of #{table.size} entries."
|
443
|
-
|
444
|
-
#
|
445
|
-
# 404s URIs
|
446
|
-
#
|
447
|
-
|
448
|
-
table = log_filtered.select { |x| x[:status] == "404" and x[:type] != ".html" }.map { |x| x[:uri] }.uniq_with_count
|
449
|
-
|
450
|
-
OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
|
451
|
-
title: "* HTML 404s",
|
452
|
-
compact: true,
|
453
|
-
headers: ["Page", "Misses"],
|
454
|
-
name: "page_miss"
|
455
|
-
|
456
|
-
puts "Total of #{table.size} entries."
|
457
|
-
|
458
|
-
#
|
459
|
-
# Attacks
|
460
|
-
#
|
461
|
-
def reasonable_response_type ext
|
462
|
-
[ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].include? ext.downcase
|
463
|
-
end
|
464
|
-
|
465
|
-
table = log_filtered.select { |x| x[:status] != "200" and not reasonable_response_type(x[:type]) }.map { |x| x[:uri] }.uniq_with_count
|
466
|
-
|
467
|
-
OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
|
468
|
-
title: "* Possible Attacks",
|
469
|
-
compact: true,
|
470
|
-
headers: ["Request", "Count"],
|
471
|
-
name: "attacks"
|
472
|
-
|
473
|
-
puts "Total of #{table.size} entries."
|
474
|
-
|
475
|
-
#
|
476
|
-
# IPs
|
477
|
-
#
|
478
|
-
|
479
|
-
table = group_and_count log_success, lambda { |x| x[:ip] }
|
480
|
-
|
481
|
-
OrgTableEmitter.emit table.prepare_for_output(sort: :key, reverse: true, limit: limit),
|
482
|
-
title: "* IPs",
|
483
|
-
compact: true,
|
484
|
-
headers: ["IP", "Hits", "Visitors", "Size"],
|
485
|
-
name: "ips"
|
486
|
-
|
487
|
-
puts "Total of #{table.size} entries."
|
488
|
-
|
489
|
-
#
|
490
|
-
# Statuses, Browsers and Platforms
|
491
|
-
#
|
492
|
-
|
493
|
-
[:status, :browser, :platform].each do |what|
|
494
|
-
|
495
|
-
result = log_filtered.map { |x| x[what] }.uniq_with_count
|
496
|
-
|
497
|
-
OrgTableEmitter.emit result.prepare_for_output(sort: :key),
|
498
|
-
title: "* #{what.to_s.capitalize}",
|
499
|
-
compact: true,
|
500
|
-
headers: [what.to_s.capitalize, "Hits"],
|
501
|
-
name: what.to_s
|
502
|
-
|
503
|
-
puts <<EOS
|
504
|
-
#+BEGIN_SRC gnuplot :var data = #{what.to_s} :results output :exports both :file #{prefix}#{what.to_s}#{suffix}.svg
|
505
|
-
reset
|
506
|
-
set grid ytics linestyle 0
|
507
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
508
|
-
|
509
|
-
set style fill solid 0.25
|
510
|
-
set boxwidth 0.6
|
511
|
-
|
512
|
-
plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
|
513
|
-
data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
|
514
|
-
#+END_SRC
|
515
|
-
EOS
|
9
|
+
@command = ARGV.join(" ")
|
10
|
+
@options = ApacheLogReport.options_parse ARGV
|
11
|
+
@log_file = ARGV[0]
|
516
12
|
|
13
|
+
if @log_file and not File.exist? @log_file
|
14
|
+
puts "Error: file #{log_file} does not exist"
|
15
|
+
exit 1
|
517
16
|
end
|
518
17
|
|
519
18
|
#
|
520
|
-
#
|
19
|
+
# Parse Log
|
521
20
|
#
|
522
|
-
result = group_and_generic_count log_filtered,
|
523
|
-
lambda { |x| x[:date_time].to_date },
|
524
|
-
lambda { |x| h = Hash.new;
|
525
|
-
h["4xx"] = x.select { |y| y[:status][0] == "4" }.count;
|
526
|
-
h["3xx"] = x.select { |y| y[:status][0] == "3" }.count;
|
527
|
-
h["2xx"] = x.select { |y| y[:status][0] == "2" }.count;
|
528
|
-
h }
|
529
|
-
|
530
|
-
OrgTableEmitter.emit result.prepare_for_output(sort: :key),
|
531
|
-
title: "* Daily Status",
|
532
|
-
compact: true,
|
533
|
-
headers: ["Day", "4xx", "3xx", "2xx"],
|
534
|
-
name: "daily_statuses"
|
535
|
-
|
536
|
-
puts <<EOS
|
537
|
-
#+BEGIN_SRC gnuplot :var data = daily_statuses :results output :exports both :file #{prefix}daily-statuses#{suffix}.svg
|
538
|
-
reset
|
539
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
540
21
|
|
541
|
-
|
22
|
+
@started_at = Time.now
|
23
|
+
@db = ApacheLogReport.parse @log_file
|
24
|
+
ApacheLogReport.analyze_data @db, @options
|
542
25
|
|
543
|
-
|
544
|
-
|
545
|
-
set ylabel "Number of Hits"
|
546
|
-
set xtics rotate by 60 right
|
547
|
-
|
548
|
-
set style fill solid 0.25
|
549
|
-
set boxwidth 0.6
|
550
|
-
|
551
|
-
set style data histograms
|
552
|
-
set style histogram clustered gap 1
|
553
|
-
|
554
|
-
plot data using 2:xtic(1) lc rgb "#CC0000" title "4xx", \\
|
555
|
-
data using 3 lc rgb "#0000CC" title "3xx", \\
|
556
|
-
data using 4 lc rgb "#00AA00" title "2xx", \\
|
557
|
-
data using ($0 - 1. / 4):($2 + 0.5):2 with labels title "" textcolor rgb("#CC0000"), \\
|
558
|
-
data using ($0):($3 + 0.5):3 with labels title "" textcolor rgb("#0000CC"), \\
|
559
|
-
data using ($0 + 1. / 4):($4 + 0.5):4 with labels title "" textcolor rgb("#00AA00")
|
560
|
-
#+END_SRC
|
561
|
-
|
562
|
-
EOS
|
26
|
+
@ended_at = Time.now
|
27
|
+
@duration = @ended_at - @started_at
|
563
28
|
|
564
29
|
#
|
565
|
-
#
|
30
|
+
# Emit Output
|
566
31
|
#
|
567
|
-
|
568
|
-
URI(x[:referer]).host
|
569
|
-
rescue Exception
|
570
|
-
""
|
571
|
-
end }
|
572
|
-
good_result = result.reject! { |k| k == nil }
|
573
|
-
|
574
|
-
OrgTableEmitter.emit good_result.prepare_for_output(sort: :key),
|
575
|
-
title: "* Referer",
|
576
|
-
compact: true,
|
577
|
-
headers: ["Referer", "Hits", "Visitors", "Size"],
|
578
|
-
name: "referers"
|
579
|
-
|
580
|
-
puts <<EOS
|
581
|
-
#+BEGIN_SRC gnuplot :var data = referers :results output :exports both :file #{prefix}referers#{suffix}.svg
|
582
|
-
reset
|
583
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
584
|
-
|
585
|
-
set grid ytics linestyle 0
|
586
|
-
set grid xtics linestyle 0
|
587
|
-
|
588
|
-
set title "Referers"
|
589
|
-
set xlabel "Date"
|
590
|
-
set xtics rotate by 60 right
|
591
|
-
set ylabel "Hits and Visits"
|
592
|
-
|
593
|
-
set style fill solid 0.45
|
594
|
-
set boxwidth 0.7
|
595
|
-
|
596
|
-
set style data histograms
|
597
|
-
set style histogram clustered gap 1
|
598
|
-
|
599
|
-
plot data using 2:xtic(1) lc rgb "#AA00AA" title "Hits", \\
|
600
|
-
data using 3 lc rgb "#0AAAA0" title "Visits", \\
|
601
|
-
data using ($0 - 1. / 3):($2 + 50):2 with labels title "" textcolor rgb("#AA00AA"), \\
|
602
|
-
data using ($0 + 1. / 3):($3 + 50):3 with labels title "" textcolor rgb("#0AAAA0")
|
603
|
-
#+END_SRC
|
604
|
-
EOS
|
605
|
-
|
606
|
-
puts <<EOS
|
607
|
-
* Local Variables :noexport:
|
608
|
-
# Local Variables:
|
609
|
-
# org-confirm-babel-evaluate: nil
|
610
|
-
# org-display-inline-images: t
|
611
|
-
# end:
|
612
|
-
EOS
|
613
|
-
|
614
|
-
ended_at = Time.now
|
615
|
-
duration = ended_at - started_at
|
616
|
-
|
617
|
-
puts <<EOS
|
618
|
-
** Performance
|
619
|
-
|
620
|
-
| Analysis started at | #{started_at.to_s} |
|
621
|
-
| Analysis ended at | #{ended_at.to_s} |
|
622
|
-
| Duration (sec) | #{"%.3d" % duration } |
|
623
|
-
| Duration (min) | #{"%.3d" % (duration / 60 )} |
|
624
|
-
| Log size | #{log.size} |
|
625
|
-
| Entries Parsed | #{log_input.size} |
|
626
|
-
| Lines/sec | #{log_input.size / duration} |
|
627
|
-
EOS
|
32
|
+
puts ApacheLogReport.emit @options, @command, @log_file, @started_at, @ended_at, @duration
|