apache_log_report 0.9.0 → 0.9.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.org +7 -0
- data/apache_log_report.gemspec +2 -0
- data/exe/apache_log_report +16 -612
- data/lib/apache_log_report.rb +509 -3
- data/lib/apache_log_report/version.rb +1 -1
- metadata +30 -5
- data/lib/apache_log_report/log_parser_hash.rb +0 -49
- data/lib/apache_log_report/log_parser_sqlite3.rb +0 -99
- data/lib/apache_log_report/option_parser.rb +0 -63
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d75b1094318f7610f0ef9f0cd34ef7fe5d2ba38effb65514d1ca2dddb90962c
|
4
|
+
data.tar.gz: c0c16a44463d9a808dd6919aaf8aa1c1dfbf16bd589bdef1c8ab6e839dba8c05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af73bf2dd1eec2b9985cc0fa0cf72c7ae1ca0afa91e14eb11e5b6e187d52a81880fe4b3051eba23ad3d5b207d977e0a9132e0644ae51853b8e6d76ff41b7ee1d
|
7
|
+
data.tar.gz: c968b4971563a5164e790392a34d7cda4787a21385f283d6f39c27ae14a4b10bb99c735d7084f562af169337f77e17e22b4a78d7e9cd9d84eca411be0d59b491
|
data/README.org
CHANGED
@@ -12,6 +12,13 @@
|
|
12
12
|
|
13
13
|
See the [[file:CHANGELOG.org][CHANGELOG]] file.
|
14
14
|
|
15
|
+
* Todo
|
16
|
+
|
17
|
+
** TODO Version information from command line and in reports
|
18
|
+
** TODO Refactor code from one giant class to more manageable chunkes
|
19
|
+
** TODO Move performance stats var to class (to isolate vars)
|
20
|
+
** TODO Check total number of days (which is not working, now)
|
21
|
+
|
15
22
|
* Compatibility
|
16
23
|
|
17
24
|
|
data/apache_log_report.gemspec
CHANGED
@@ -27,6 +27,8 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
+
spec.add_dependency "apache_log-parser"
|
30
31
|
spec.add_dependency "browser"
|
31
32
|
spec.add_dependency "sqlite3"
|
33
|
+
spec.add_dependency "terminal-table"
|
32
34
|
end
|
data/exe/apache_log_report
CHANGED
@@ -1,627 +1,31 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'apache_log_report'
|
4
|
-
require 'progressbar'
|
5
|
-
|
6
|
-
LIMIT = 30
|
7
|
-
|
8
|
-
##############################################################################
|
9
|
-
# MONKEY PATCH ARRAY AND HASH TO BUILD PIPES WHEN COMPUTING STATS
|
10
|
-
|
11
|
-
class Array
|
12
|
-
# counts occurrences of each element in an array and returns a hash { element => count }
|
13
|
-
def uniq_with_count
|
14
|
-
h = Hash.new(0); self.each { |l| h[l] += 1 }; h
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
class Hash
|
19
|
-
# sort and limit entries of a hash. Returns an array which can be output
|
20
|
-
# sort by key or value, optionally reverse, optionally return a limited number of items
|
21
|
-
# {2014 => 10, 2015 => 20, 2010 => 30} => [[2014, 10], ...]
|
22
|
-
def prepare_for_output options = {}
|
23
|
-
sorted_a = []
|
24
|
-
|
25
|
-
if options[:sort] == :key
|
26
|
-
sorted_a = self.sort_by { |k, v| k }
|
27
|
-
elsif options[:sort]
|
28
|
-
sorted_a = self.sort_by { |k, v| v[options[:sort]] }
|
29
|
-
else
|
30
|
-
sorted_a = self.to_a
|
31
|
-
end
|
32
|
-
|
33
|
-
sorted_a = sorted_a.reverse if options[:reverse]
|
34
|
-
sorted_a = sorted_a[0, options[:limit]] if options[:limit]
|
35
|
-
|
36
|
-
sorted_a
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
##############################################################################
|
41
|
-
# EMITTERS ARE USED TO OUTPUT TO DIFFERENT FORMATS
|
42
|
-
|
43
|
-
class OrgTableEmitter
|
44
|
-
# print either of:
|
45
|
-
# [[ row_title, value, value, value, ...], ...]
|
46
|
-
# [[ row_title, {key: value, key: value, key: value}, ...], ...]
|
47
|
-
def self.emit table, options = {}
|
48
|
-
if table.size == 0
|
49
|
-
puts "No values to show"
|
50
|
-
return
|
51
|
-
end
|
52
|
-
|
53
|
-
puts "\n\n#{options[:title]}\n\n" if options[:title]
|
54
|
-
|
55
|
-
puts "#+NAME: #{options[:name]}" if options[:name]
|
56
|
-
|
57
|
-
# if table is in the form of a hash, transform it into an array
|
58
|
-
if table[0][1].class == Hash then
|
59
|
-
table = table.map { |x| [ x[0] ] + x[1].values }
|
60
|
-
end
|
61
|
-
|
62
|
-
# get longest row title and then longest column or use 50 and 10 as defaults
|
63
|
-
firstcol_length = options[:compact] ? 2 + table.map { |x| x[0].to_s.size }.max : 50
|
64
|
-
othercol_length = options[:compact] ? 2 + table.map { |x| x[1..-1].map { |x| x.to_s.size }.max }.max : 10
|
65
|
-
# take into account also the headers lengths'
|
66
|
-
headers_length = options[:headers] ? 2 + options[:headers][1..-1].map { |x| x.to_s.size }.max : 0
|
67
|
-
othercol_length = [othercol_length, headers_length].max
|
68
|
-
|
69
|
-
# build the formatting string
|
70
|
-
col_sizes = [ firstcol_length ] + [othercol_length] * table[0][1..-1].size
|
71
|
-
col_classes = table[0].map { |x| x.class.to_s }
|
72
|
-
col_formats = col_classes.each_with_index.map { |x, i| format_for(x, col_sizes[i]) }.join("") + "|"
|
73
|
-
|
74
|
-
# print header if asked to do so
|
75
|
-
if options[:headers]
|
76
|
-
puts (col_sizes.map { |x| "| %-#{x}s " }.join("") % options[:headers]) + "|"
|
77
|
-
puts (col_sizes.map { |x| "|#{"-" * (2 + x)}" }.join("")) + "|"
|
78
|
-
end
|
79
|
-
|
80
|
-
# print each table row
|
81
|
-
table.each do |row|
|
82
|
-
puts col_formats % row
|
83
|
-
end
|
84
|
-
|
85
|
-
puts "\n"
|
86
|
-
end
|
87
|
-
|
88
|
-
private
|
89
|
-
|
90
|
-
def self.format_for klass, size
|
91
|
-
case klass
|
92
|
-
when "String"
|
93
|
-
"| %-#{size}s "
|
94
|
-
when "Integer"
|
95
|
-
"| %#{size}d "
|
96
|
-
when "Double"
|
97
|
-
"| %#{size}.2f "
|
98
|
-
when "Float"
|
99
|
-
"| %#{size}.2f "
|
100
|
-
else
|
101
|
-
"| %#{size}s "
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
##############################################################################
|
109
|
-
# PARSE OPTIONS
|
110
|
-
|
111
|
-
|
112
|
-
##############################################################################
|
113
|
-
# PARSE COMMAND LINE, SETS OPTIONS, PERFORM BASIC CHECKS, AND RUN
|
114
|
-
|
115
|
-
def days log
|
116
|
-
(log.last[:date_time].to_date - log.first[:date_time].to_date).to_i
|
117
|
-
end
|
118
|
-
|
119
|
-
options = OptionParser.parse ARGV
|
120
|
-
|
121
|
-
limit = options[:limit]
|
122
|
-
from_date = options[:from_date]
|
123
|
-
to_date = options[:to_date]
|
124
|
-
|
125
|
-
ignore_crawlers = options[:ignore_crawlers]
|
126
|
-
only_crawlers = options[:only_crawlers]
|
127
|
-
distinguish_crawlers = options[:distinguish_crawlers]
|
128
|
-
|
129
|
-
no_selfpoll = options[:no_selfpoll]
|
130
|
-
prefix = options[:prefix] ? "#{options[:prefix]}" : ""
|
131
|
-
suffix = options[:suffix] ? "#{options[:suffix]}" : ""
|
132
|
-
log_file = ARGV[0]
|
133
|
-
|
134
|
-
|
135
|
-
if log_file and not File.exist? log_file
|
136
|
-
puts "Error: file #{log_file} does not exist"
|
137
|
-
exit 1
|
138
|
-
end
|
139
|
-
|
140
|
-
##############################################################################
|
141
|
-
# COMPUTE THE STATISTICS
|
142
|
-
|
143
|
-
started_at = Time.now
|
144
|
-
log = LogParser.parse log_file, options
|
145
|
-
days = days(log)
|
146
|
-
|
147
|
-
log_no_selfpolls = log.select { |x| x[:ip] != "::1" }
|
148
|
-
|
149
|
-
log_input = no_selfpoll ? log : log_no_selfpolls
|
150
|
-
|
151
|
-
# get only requested entries
|
152
|
-
log_filtered = log_input.select { |x|
|
153
|
-
(not from_date or from_date <= x[:date_time]) and
|
154
|
-
(not to_date or x[:date_time] <= to_date) and
|
155
|
-
(not ignore_crawlers or x[:bot] == false) and
|
156
|
-
(not only_crawlers or x[:bot] == true)
|
157
|
-
}
|
158
|
-
days_filtered = days(log_filtered)
|
159
|
-
|
160
|
-
printf <<EOS
|
161
|
-
#+TITLE: Apache Log Analysis: #{log_file}
|
162
|
-
#+DATE: <#{Date.today}>
|
163
|
-
#+STARTUP: showall
|
164
|
-
#+OPTIONS: ^:{}
|
165
|
-
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="ala-style.css" />
|
166
|
-
#+OPTIONS: html-style:nil
|
167
|
-
EOS
|
168
|
-
|
169
|
-
puts "\n\n* Summary"
|
170
|
-
|
171
|
-
OrgTableEmitter.emit [ ["Input file", log_file || "stdin"],
|
172
|
-
["Ignore crawlers", options[:ignore_crawlers] == true],
|
173
|
-
["Only crawlers", options[:only_crawlers] == true],
|
174
|
-
["Distinguish crawlers", options[:distinguish_crawlers] == true],
|
175
|
-
["No selfpoll", no_selfpoll],
|
176
|
-
["Filter by date", (options[:from_date] != nil or options[:to_date] != nil)],
|
177
|
-
["Prefix", prefix],
|
178
|
-
["Suffix", suffix]
|
179
|
-
],
|
180
|
-
title: "** Log Analysis Request Summary",
|
181
|
-
compact: true
|
182
|
-
|
183
|
-
|
184
|
-
OrgTableEmitter.emit [ ["First request", log.first[:date_time]],
|
185
|
-
["Last request", log.last[:date_time]],
|
186
|
-
["Days", days.to_s]
|
187
|
-
],
|
188
|
-
title: "** Logging Period",
|
189
|
-
compact: true
|
190
|
-
|
191
|
-
|
192
|
-
OrgTableEmitter.emit [ ["First day (filtered)", log_filtered.first[:date_time]],
|
193
|
-
["Last day (filtered)", log_filtered.last[:date_time]],
|
194
|
-
["Days (filtered)", days_filtered.to_s]
|
195
|
-
],
|
196
|
-
title: "** Portion Analyzed",
|
197
|
-
compact: true
|
198
|
-
|
199
|
-
|
200
|
-
OrgTableEmitter.emit [ ["Log size", log.size],
|
201
|
-
["Self poll entries", log.size - log_no_selfpolls.size],
|
202
|
-
["Entries Parsed", log_input.size],
|
203
|
-
["Entries after filtering", log_filtered.size],
|
204
|
-
],
|
205
|
-
title: "** Filtering",
|
206
|
-
compact: true,
|
207
|
-
name: "size"
|
208
|
-
|
209
|
-
#
|
210
|
-
# hits, unique visitors, and size per day
|
211
|
-
# take an array of hashes, group by a lambda function, count hits, visitors, and tx data
|
212
|
-
#
|
213
|
-
def group_and_count log, key
|
214
|
-
matches = log.group_by { |x| key.call(x) }
|
215
|
-
|
216
|
-
h = {}
|
217
|
-
|
218
|
-
# each key in matches is an array of hashes (all log entries matching key)
|
219
|
-
matches.each do |k, v|
|
220
|
-
h[k] = {
|
221
|
-
hits: v.size,
|
222
|
-
visitors: v.uniq { |x| [ x[:date_time].to_date, x[:ip], x[:user_agent_string] ] }.count,
|
223
|
-
tx: v.map { |x| x[:size] }.inject(&:+) / 1024,
|
224
|
-
}
|
225
|
-
end
|
226
|
-
|
227
|
-
h
|
228
|
-
end
|
229
|
-
|
230
|
-
# like the previous function, but the count function is responsible of returning a hash with the desired data
|
231
|
-
# the previous function is: group_and_generic_count log, key, lamnda { |v| { hits: v.size, visitors: v.uniq ..., tx: v.map ... } }
|
232
|
-
def group_and_generic_count log, key, count
|
233
|
-
matches = log.group_by { |x| key.call(x) }
|
234
|
-
|
235
|
-
h = {}
|
236
|
-
|
237
|
-
# each key in matches is an array of hashes (all log entries matching key)
|
238
|
-
matches.each do |k, v|
|
239
|
-
h[k] = count.call(v)
|
240
|
-
end
|
241
|
-
|
242
|
-
h
|
243
|
-
end
|
244
|
-
|
245
|
-
|
246
|
-
def totals hash
|
247
|
-
h = Hash.new
|
248
|
-
[:hits, :visitors, :tx].each do |c|
|
249
|
-
h[c] = hash.values.map { |x| x[c] }.inject(&:+)
|
250
|
-
end
|
251
|
-
h
|
252
|
-
end
|
253
|
-
|
254
|
-
##############################################################################
|
255
|
-
|
256
|
-
table = group_and_count log_filtered, lambda { |x| x[:date_time].to_date }
|
257
|
-
totals = totals table
|
258
|
-
|
259
|
-
OrgTableEmitter.emit [ ["Hits", totals[:hits]],
|
260
|
-
["Unique Visitors", totals[:visitors]],
|
261
|
-
["Hits / Unique Visitor", totals[:hits] / totals[:visitors].to_f],
|
262
|
-
["TX (Kb)", totals[:tx] ],
|
263
|
-
["TX (Kb) / Unique Visitor", totals[:tx] / totals[:visitors]],
|
264
|
-
],
|
265
|
-
title: "* Totals",
|
266
|
-
name: "totals",
|
267
|
-
compact: true
|
268
|
-
|
269
|
-
if (distinguish_crawlers)
|
270
|
-
bot_table = group_and_count log_filtered.select { |x| x[:bot] }, lambda { |x| x[:date_time].to_date }
|
271
|
-
bot_totals = totals bot_table
|
272
|
-
|
273
|
-
OrgTableEmitter.emit [ ["Hits", bot_totals[:hits]],
|
274
|
-
["Unique Visitors", bot_totals[:visitors]],
|
275
|
-
["Hits / Unique Visitor", bot_totals[:hits] / bot_totals[:visitors].to_f],
|
276
|
-
["TX (Kb)", bot_totals[:tx] ],
|
277
|
-
["TX (Kb) / Unique Visitor", bot_totals[:tx] / bot_totals[:visitors]],
|
278
|
-
],
|
279
|
-
title: "** Bot Totals",
|
280
|
-
name: "bot_totals",
|
281
|
-
compact: true
|
282
|
-
|
283
|
-
vis_table = group_and_count log_filtered.select { |x| not x[:bot] }, lambda { |x| x[:date_time].to_date }
|
284
|
-
vis_totals = totals vis_table
|
285
|
-
|
286
|
-
OrgTableEmitter.emit [ ["Hits", vis_totals[:hits]],
|
287
|
-
["Unique Visitors", vis_totals[:visitors]],
|
288
|
-
["Hits / Unique Visitor", vis_totals[:hits] / vis_totals[:visitors].to_f],
|
289
|
-
["TX (Kb)", vis_totals[:tx] ],
|
290
|
-
["TX (Kb) / Unique Visitor", vis_totals[:tx] / vis_totals[:visitors]],
|
291
|
-
],
|
292
|
-
title: "** Visitors Totals",
|
293
|
-
name: "vis_totals",
|
294
|
-
compact: true
|
295
|
-
|
296
|
-
end
|
297
|
-
|
298
|
-
enriched_table = Hash.new
|
299
|
-
table.map { |k, v| enriched_table[k] = v.merge({ dow: k.wday, month: k.month }) }
|
300
|
-
|
301
|
-
OrgTableEmitter.emit enriched_table.prepare_for_output(sort: :key),
|
302
|
-
title: "* Daily Distribution",
|
303
|
-
compact: true,
|
304
|
-
headers: ["Day", "Hits", "Visitors", "Size", "Wday", "Month"],
|
305
|
-
name: "daily_distribution"
|
306
|
-
|
307
|
-
puts <<EOS
|
308
|
-
#+BEGIN_SRC gnuplot :var data = daily_distribution :results output :exports both :file #{prefix}daily#{suffix}.svg
|
309
|
-
reset
|
310
|
-
set grid ytics linestyle 0
|
311
|
-
set grid xtics linestyle 0
|
312
|
-
set terminal svg size 1200,800 fname 'Arial'
|
313
|
-
|
314
|
-
set xdata time
|
315
|
-
set timefmt "%Y-%m-%d"
|
316
|
-
set format x "%a, %b %d"
|
317
|
-
set xtics rotate by 60 right
|
318
|
-
|
319
|
-
set title "Hits and Visitors"
|
320
|
-
set xlabel "Date"
|
321
|
-
set ylabel "Hits"
|
322
|
-
set ylabel2 "Visits"
|
323
|
-
|
324
|
-
set style fill transparent solid 0.2 noborder
|
325
|
-
|
326
|
-
plot data using 1:2 with linespoints lw 3 lc rgb "#0000AA" pointtype 5 title "Hits" axes x1y2, \\
|
327
|
-
data using 1:2 with filledcurves below x1 linecolor rgb "#0000AA" notitle axes x1y2, \\
|
328
|
-
data using 1:3 with linespoints lw 3 lc rgb "#AA0000" pointtype 7 title "Visitors", \\
|
329
|
-
data using 1:3 with filledcurves below x1 notitle linecolor rgb "#AA0000", \\
|
330
|
-
data using 1:($3+10):3 with labels notitle textcolor rgb "#AA0000", \\
|
331
|
-
data using 1:($2+100):2 with labels notitle textcolor rgb "#0000AA" axes x1y2
|
332
|
-
#+END_SRC
|
333
|
-
EOS
|
334
|
-
|
335
|
-
#
|
336
|
-
# distribution per hour
|
337
|
-
#
|
338
|
-
|
339
|
-
table = group_and_count log_filtered, lambda { |x| x[:date_time].hour }
|
340
|
-
table_processed = table.prepare_for_output(sort: :key).map { |x|
|
341
|
-
["%02d" % x[0] + ":00"] +
|
342
|
-
[ x[1].merge(hits_per_day: x[1][:hits] / days_filtered,
|
343
|
-
visitors_per_day: x[1][:visitors] / days_filtered,
|
344
|
-
tx_per_day: x[1][:tx] / days_filtered) ] }
|
345
|
-
|
346
|
-
OrgTableEmitter.emit table_processed,
|
347
|
-
title: "* Time Distribution",
|
348
|
-
compact: true,
|
349
|
-
headers: ["Time", "Hits", "Visitors", "Size (Kb)", "Hits/Day", "Visit/Day", "Size (Kb)/Day"],
|
350
|
-
name: "time_distribution"
|
351
|
-
|
352
|
-
puts <<EOS
|
353
|
-
#+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time#{suffix}.svg
|
354
|
-
reset
|
355
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
356
|
-
|
357
|
-
set grid ytics linestyle 0
|
358
|
-
|
359
|
-
set title "Hits and Visitors"
|
360
|
-
set xlabel "Date"
|
361
|
-
set ylabel "Hits and Visits"
|
362
|
-
|
363
|
-
set style fill solid 0.25
|
364
|
-
set boxwidth 0.6
|
365
|
-
|
366
|
-
set style data histograms
|
367
|
-
set style histogram clustered gap 1
|
368
|
-
|
369
|
-
plot data using 2:xtic(1) lc rgb "#0000AA" title "Hits", \\
|
370
|
-
data using 3 lc rgb "#AA0000" title "Visitors" axes x1y2, \\
|
371
|
-
data using ($0 - 0.2):($2 + 10):2 with labels title "" textcolor rgb("#0000AA"), \\
|
372
|
-
data using ($0 + 0.2):($3 + 10):3 with labels title "" textcolor rgb("#AA0000") axes x1y2
|
373
|
-
#+END_SRC
|
374
|
-
|
375
|
-
EOS
|
376
|
-
|
377
|
-
puts <<EOS
|
378
|
-
#+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time-traffic#{suffix}.svg
|
379
|
-
reset
|
380
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
381
|
-
|
382
|
-
set grid ytics linestyle 0
|
383
|
-
|
384
|
-
set title "Traffic"
|
385
|
-
set xlabel "Date"
|
386
|
-
set ylabel "Traffic"
|
387
|
-
|
388
|
-
set style fill solid 0.50
|
389
|
-
set boxwidth 0.6
|
390
|
-
|
391
|
-
set style data histograms
|
392
|
-
set style histogram clustered gap 1
|
393
|
-
|
394
|
-
plot data using 2:xtic(1) lc rgb "#00AA00" title "Traffic", \\
|
395
|
-
data using ($0):($2 + 10):2 with labels title "" textcolor rgb("#00AA00")
|
396
|
-
#+END_SRC
|
397
|
-
|
398
|
-
EOS
|
399
|
-
|
400
|
-
#
|
401
|
-
# most requested pages
|
402
|
-
#
|
403
|
-
|
404
|
-
log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] == ".html" }
|
405
|
-
table = group_and_count log_success, lambda { |x| x[:uri] }
|
406
|
-
|
407
|
-
OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
|
408
|
-
title: "* Most Requested Pages",
|
409
|
-
compact: true,
|
410
|
-
headers: ["Page", "Hits", "Visitors", "Size"],
|
411
|
-
name: "pages"
|
412
|
-
|
413
|
-
puts "Total of #{table.size} entries."
|
414
|
-
|
415
|
-
#
|
416
|
-
# most requested URIs
|
417
|
-
#
|
418
|
-
|
419
|
-
log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] != ".html" }
|
420
|
-
table = group_and_count log_success, lambda { |x| x[:uri] }
|
421
|
-
|
422
|
-
OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
|
423
|
-
title: "* Most Requested URIs",
|
424
|
-
compact: true,
|
425
|
-
headers: ["URI", "Hits", "Visitors", "Size"],
|
426
|
-
name: "pages"
|
427
|
-
|
428
|
-
puts "Total of #{table.size} entries."
|
429
|
-
|
430
|
-
#
|
431
|
-
# 404s (Pages)
|
432
|
-
#
|
433
|
-
|
434
|
-
table = log_filtered.select { |x| x[:status] == "404" and x[:type] == ".html" }.map { |x| x[:uri] }.uniq_with_count
|
435
|
-
|
436
|
-
OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
|
437
|
-
title: "* HTML 404s",
|
438
|
-
compact: true,
|
439
|
-
headers: ["Page", "Misses"],
|
440
|
-
name: "page_miss"
|
441
|
-
|
442
|
-
puts "Total of #{table.size} entries."
|
443
|
-
|
444
|
-
#
|
445
|
-
# 404s URIs
|
446
|
-
#
|
447
|
-
|
448
|
-
table = log_filtered.select { |x| x[:status] == "404" and x[:type] != ".html" }.map { |x| x[:uri] }.uniq_with_count
|
449
|
-
|
450
|
-
OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
|
451
|
-
title: "* HTML 404s",
|
452
|
-
compact: true,
|
453
|
-
headers: ["Page", "Misses"],
|
454
|
-
name: "page_miss"
|
455
|
-
|
456
|
-
puts "Total of #{table.size} entries."
|
3
|
+
require 'apache_log_report.rb'
|
457
4
|
|
458
5
|
#
|
459
|
-
#
|
6
|
+
# Parse Command Line Arguments
|
460
7
|
#
|
461
|
-
def reasonable_response_type ext
|
462
|
-
[ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].include? ext.downcase
|
463
|
-
end
|
464
|
-
|
465
|
-
table = log_filtered.select { |x| x[:status] != "200" and not reasonable_response_type(x[:type]) }.map { |x| x[:uri] }.uniq_with_count
|
466
|
-
|
467
|
-
OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
|
468
|
-
title: "* Possible Attacks",
|
469
|
-
compact: true,
|
470
|
-
headers: ["Request", "Count"],
|
471
|
-
name: "attacks"
|
472
|
-
|
473
|
-
puts "Total of #{table.size} entries."
|
474
|
-
|
475
|
-
#
|
476
|
-
# IPs
|
477
|
-
#
|
478
|
-
|
479
|
-
table = group_and_count log_success, lambda { |x| x[:ip] }
|
480
|
-
|
481
|
-
OrgTableEmitter.emit table.prepare_for_output(sort: :key, reverse: true, limit: limit),
|
482
|
-
title: "* IPs",
|
483
|
-
compact: true,
|
484
|
-
headers: ["IP", "Hits", "Visitors", "Size"],
|
485
|
-
name: "ips"
|
486
|
-
|
487
|
-
puts "Total of #{table.size} entries."
|
488
|
-
|
489
|
-
#
|
490
|
-
# Statuses, Browsers and Platforms
|
491
|
-
#
|
492
|
-
|
493
|
-
[:status, :browser, :platform].each do |what|
|
494
|
-
|
495
|
-
result = log_filtered.map { |x| x[what] }.uniq_with_count
|
496
|
-
|
497
|
-
OrgTableEmitter.emit result.prepare_for_output(sort: :key),
|
498
|
-
title: "* #{what.to_s.capitalize}",
|
499
|
-
compact: true,
|
500
|
-
headers: [what.to_s.capitalize, "Hits"],
|
501
|
-
name: what.to_s
|
502
|
-
|
503
|
-
puts <<EOS
|
504
|
-
#+BEGIN_SRC gnuplot :var data = #{what.to_s} :results output :exports both :file #{prefix}#{what.to_s}#{suffix}.svg
|
505
|
-
reset
|
506
|
-
set grid ytics linestyle 0
|
507
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
508
|
-
|
509
|
-
set style fill solid 0.25
|
510
|
-
set boxwidth 0.6
|
511
8
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
EOS
|
9
|
+
@command = ARGV.join(" ")
|
10
|
+
@options = ApacheLogReport.options_parse ARGV
|
11
|
+
@log_file = ARGV[0]
|
516
12
|
|
13
|
+
if @log_file and not File.exist? @log_file
|
14
|
+
puts "Error: file #{log_file} does not exist"
|
15
|
+
exit 1
|
517
16
|
end
|
518
17
|
|
519
18
|
#
|
520
|
-
#
|
19
|
+
# Parse Log and Track Statistics
|
521
20
|
#
|
522
|
-
result = group_and_generic_count log_filtered,
|
523
|
-
lambda { |x| x[:date_time].to_date },
|
524
|
-
lambda { |x| h = Hash.new;
|
525
|
-
h["4xx"] = x.select { |y| y[:status][0] == "4" }.count;
|
526
|
-
h["3xx"] = x.select { |y| y[:status][0] == "3" }.count;
|
527
|
-
h["2xx"] = x.select { |y| y[:status][0] == "2" }.count;
|
528
|
-
h }
|
529
|
-
|
530
|
-
OrgTableEmitter.emit result.prepare_for_output(sort: :key),
|
531
|
-
title: "* Daily Status",
|
532
|
-
compact: true,
|
533
|
-
headers: ["Day", "4xx", "3xx", "2xx"],
|
534
|
-
name: "daily_statuses"
|
535
|
-
|
536
|
-
puts <<EOS
|
537
|
-
#+BEGIN_SRC gnuplot :var data = daily_statuses :results output :exports both :file #{prefix}daily-statuses#{suffix}.svg
|
538
|
-
reset
|
539
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
540
|
-
|
541
|
-
set grid ytics linestyle 0
|
542
21
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
set style fill solid 0.25
|
549
|
-
set boxwidth 0.6
|
550
|
-
|
551
|
-
set style data histograms
|
552
|
-
set style histogram clustered gap 1
|
553
|
-
|
554
|
-
plot data using 2:xtic(1) lc rgb "#CC0000" title "4xx", \\
|
555
|
-
data using 3 lc rgb "#0000CC" title "3xx", \\
|
556
|
-
data using 4 lc rgb "#00AA00" title "2xx", \\
|
557
|
-
data using ($0 - 1. / 4):($2 + 0.5):2 with labels title "" textcolor rgb("#CC0000"), \\
|
558
|
-
data using ($0):($3 + 0.5):3 with labels title "" textcolor rgb("#0000CC"), \\
|
559
|
-
data using ($0 + 1. / 4):($4 + 0.5):4 with labels title "" textcolor rgb("#00AA00")
|
560
|
-
#+END_SRC
|
561
|
-
|
562
|
-
EOS
|
22
|
+
@started_at = Time.now
|
23
|
+
@db = ApacheLogReport.parse @log_file
|
24
|
+
ApacheLogReport.analyze_data @db, @options
|
25
|
+
@ended_at = Time.now
|
26
|
+
@duration = @ended_at - @started_at
|
563
27
|
|
564
28
|
#
|
565
|
-
#
|
29
|
+
# Emit Output
|
566
30
|
#
|
567
|
-
|
568
|
-
URI(x[:referer]).host
|
569
|
-
rescue Exception
|
570
|
-
""
|
571
|
-
end }
|
572
|
-
good_result = result.reject! { |k| k == nil }
|
573
|
-
|
574
|
-
OrgTableEmitter.emit good_result.prepare_for_output(sort: :key),
|
575
|
-
title: "* Referer",
|
576
|
-
compact: true,
|
577
|
-
headers: ["Referer", "Hits", "Visitors", "Size"],
|
578
|
-
name: "referers"
|
579
|
-
|
580
|
-
puts <<EOS
|
581
|
-
#+BEGIN_SRC gnuplot :var data = referers :results output :exports both :file #{prefix}referers#{suffix}.svg
|
582
|
-
reset
|
583
|
-
set terminal svg size 1200,800 fname 'Arial' fsize 10
|
584
|
-
|
585
|
-
set grid ytics linestyle 0
|
586
|
-
set grid xtics linestyle 0
|
587
|
-
|
588
|
-
set title "Referers"
|
589
|
-
set xlabel "Date"
|
590
|
-
set xtics rotate by 60 right
|
591
|
-
set ylabel "Hits and Visits"
|
592
|
-
|
593
|
-
set style fill solid 0.45
|
594
|
-
set boxwidth 0.7
|
595
|
-
|
596
|
-
set style data histograms
|
597
|
-
set style histogram clustered gap 1
|
598
|
-
|
599
|
-
plot data using 2:xtic(1) lc rgb "#AA00AA" title "Hits", \\
|
600
|
-
data using 3 lc rgb "#0AAAA0" title "Visits", \\
|
601
|
-
data using ($0 - 1. / 3):($2 + 50):2 with labels title "" textcolor rgb("#AA00AA"), \\
|
602
|
-
data using ($0 + 1. / 3):($3 + 50):3 with labels title "" textcolor rgb("#0AAAA0")
|
603
|
-
#+END_SRC
|
604
|
-
EOS
|
605
|
-
|
606
|
-
puts <<EOS
|
607
|
-
* Local Variables :noexport:
|
608
|
-
# Local Variables:
|
609
|
-
# org-confirm-babel-evaluate: nil
|
610
|
-
# org-display-inline-images: t
|
611
|
-
# end:
|
612
|
-
EOS
|
613
|
-
|
614
|
-
ended_at = Time.now
|
615
|
-
duration = ended_at - started_at
|
616
|
-
|
617
|
-
puts <<EOS
|
618
|
-
** Performance
|
619
|
-
|
620
|
-
| Analysis started at | #{started_at.to_s} |
|
621
|
-
| Analysis ended at | #{ended_at.to_s} |
|
622
|
-
| Duration (sec) | #{"%.3d" % duration } |
|
623
|
-
| Duration (min) | #{"%.3d" % (duration / 60 )} |
|
624
|
-
| Log size | #{log.size} |
|
625
|
-
| Entries Parsed | #{log_input.size} |
|
626
|
-
| Lines/sec | #{log_input.size / duration} |
|
627
|
-
EOS
|
31
|
+
puts ApacheLogReport.emit @options, @command, @log_file, @started_at, @ended_at, @duration
|