apache_log_report 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 911c11b89b21140783332a66419e442ed366debad950a430af6265a5cc800129
4
+ data.tar.gz: 0f4d3bf245a2436412d7419d724c0c996d7ea0c6057a847f24cde376e6cf13c5
5
+ SHA512:
6
+ metadata.gz: eed37e0f7c2098e2ca6192760ca5c9722ada8c0a5259d3c52fd9ae382b32a0d39f3da3e37bbcc9ab2446d92e35c3b538c1037ac48a4364f8d05ec2c58ab54fac
7
+ data.tar.gz: 563e8a68028d905bd8389ceefff7ed02cdb82715426fa1e2d5b47feeb471d75034e59b948a04899801e6d7b708f0dca7ff385111d163aef273b91f5c8e3bf2f4
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ *~
@@ -0,0 +1,27 @@
1
+ #+TITLE: ChangeLog
2
+ #+AUTHOR: Adolfo Villafiorita
3
+ #+STARTUP: showall
4
+
5
+ * Unreleased
6
+
7
+ This changes are in the repository but not yet released to Rubygems.
8
+
9
+ ** New Functions and Changes
10
+
11
+ ** Fixes
12
+
13
+ ** Documentation
14
+
15
+ ** Code
16
+
17
+
18
+ * Version 1.0.0
19
+
20
+ ** New Functions and Changes
21
+
22
+ ** Fixes
23
+
24
+ ** Documentation
25
+
26
+ ** Code
27
+
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in apache_log_report.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Adolfo Villafiorita
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,30 @@
1
+ #+TITLE: README
2
+ #+AUTHOR: Adolfo Villafiorita
3
+ #+STARTUP: showall
4
+
5
+ * Introduction
6
+
7
+ * Installation
8
+
9
+ * Usage
10
+
11
+ * Change Log
12
+
13
+ See the [[file:CHANGELOG.org][CHANGELOG]] file.
14
+
15
+ * Compatibility
16
+
17
+
18
+ * Author and Contributors
19
+
20
+ [[http://ict4g.net/adolfo][Adolfo Villafiorita]].
21
+
22
+ * Known Bugs
23
+
24
+ Some known bugs and an unknown number of unknown bugs.
25
+
26
+ (See the open issues for the known bugs.)
27
+
28
+ * License
29
+
30
+ Distributed under the terms of the [[http://opensource.org/licenses/MIT][MIT License]].
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,32 @@
1
+ require_relative 'lib/apache_log_report/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "apache_log_report"
5
+ spec.version = ApacheLogReport::VERSION
6
+ spec.authors = ["Adolfo Villafiorita"]
7
+ spec.email = ["adolfo.villafiorita@ict4g.net"]
8
+
9
+ spec.summary = %q{Generate a request report in OrgMode format from an Apache log file.}
10
+ spec.description = %q{Generate a request report in OrgMode format from an Apache log file.}
11
+ spec.homepage = "https://www.ict4g.net/gitea/adolfo/apache_log_report"
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://www.ict4g.net/gitea/adolfo/apache_log_report"
19
+ spec.metadata["changelog_uri"] = "https://www.ict4g.net/gitea/adolfo/apache_log_report/CHANGELOG.org"
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_dependency "browser"
31
+ spec.add_dependency "sqlite3"
32
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "apache_log_report"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,627 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'apache_log_report'
4
+ require 'progressbar'
5
+
6
+ LIMIT = 30
7
+
8
+ ##############################################################################
9
+ # MONKEY PATCH ARRAY AND HASH TO BUILD PIPES WHEN COMPUTING STATS
10
+
11
+ class Array
12
+ # counts occurrences of each element in an array and returns a hash { element => count }
13
+ def uniq_with_count
14
+ h = Hash.new(0); self.each { |l| h[l] += 1 }; h
15
+ end
16
+ end
17
+
18
+ class Hash
19
+ # sort and limit entries of a hash. Returns an array which can be output
20
+ # sort by key or value, optionally reverse, optionally return a limited number of items
21
+ # {2014 => 10, 2015 => 20, 2010 => 30} => [[2014, 10], ...]
22
+ def prepare_for_output options = {}
23
+ sorted_a = []
24
+
25
+ if options[:sort] == :key
26
+ sorted_a = self.sort_by { |k, v| k }
27
+ elsif options[:sort]
28
+ sorted_a = self.sort_by { |k, v| v[options[:sort]] }
29
+ else
30
+ sorted_a = self.to_a
31
+ end
32
+
33
+ sorted_a = sorted_a.reverse if options[:reverse]
34
+ sorted_a = sorted_a[0, options[:limit]] if options[:limit]
35
+
36
+ sorted_a
37
+ end
38
+ end
39
+
40
+ ##############################################################################
41
+ # EMITTERS ARE USED TO OUTPUT TO DIFFERENT FORMATS
42
+
43
+ class OrgTableEmitter
44
+ # print either of:
45
+ # [[ row_title, value, value, value, ...], ...]
46
+ # [[ row_title, {key: value, key: value, key: value}, ...], ...]
47
+ def self.emit table, options = {}
48
+ if table.size == 0
49
+ puts "No values to show"
50
+ return
51
+ end
52
+
53
+ puts "\n\n#{options[:title]}\n\n" if options[:title]
54
+
55
+ puts "#+NAME: #{options[:name]}" if options[:name]
56
+
57
+ # if table is in the form of a hash, transform it into an array
58
+ if table[0][1].class == Hash then
59
+ table = table.map { |x| [ x[0] ] + x[1].values }
60
+ end
61
+
62
+ # get longest row title and then longest column or use 50 and 10 as defaults
63
+ firstcol_length = options[:compact] ? 2 + table.map { |x| x[0].to_s.size }.max : 50
64
+ othercol_length = options[:compact] ? 2 + table.map { |x| x[1..-1].map { |x| x.to_s.size }.max }.max : 10
65
+ # take into account also the headers lengths'
66
+ headers_length = options[:headers] ? 2 + options[:headers][1..-1].map { |x| x.to_s.size }.max : 0
67
+ othercol_length = [othercol_length, headers_length].max
68
+
69
+ # build the formatting string
70
+ col_sizes = [ firstcol_length ] + [othercol_length] * table[0][1..-1].size
71
+ col_classes = table[0].map { |x| x.class.to_s }
72
+ col_formats = col_classes.each_with_index.map { |x, i| format_for(x, col_sizes[i]) }.join("") + "|"
73
+
74
+ # print header if asked to do so
75
+ if options[:headers]
76
+ puts (col_sizes.map { |x| "| %-#{x}s " }.join("") % options[:headers]) + "|"
77
+ puts (col_sizes.map { |x| "|#{"-" * (2 + x)}" }.join("")) + "|"
78
+ end
79
+
80
+ # print each table row
81
+ table.each do |row|
82
+ puts col_formats % row
83
+ end
84
+
85
+ puts "\n"
86
+ end
87
+
88
+ private
89
+
90
+ def self.format_for klass, size
91
+ case klass
92
+ when "String"
93
+ "| %-#{size}s "
94
+ when "Integer"
95
+ "| %#{size}d "
96
+ when "Double"
97
+ "| %#{size}.2f "
98
+ when "Float"
99
+ "| %#{size}.2f "
100
+ else
101
+ "| %#{size}s "
102
+ end
103
+ end
104
+
105
+ end
106
+
107
+
108
+ ##############################################################################
109
+ # PARSE OPTIONS
110
+
111
+
112
+ ##############################################################################
113
+ # PARSE COMMAND LINE, SETS OPTIONS, PERFORM BASIC CHECKS, AND RUN
114
+
115
+ def days log
116
+ (log.last[:date_time].to_date - log.first[:date_time].to_date).to_i
117
+ end
118
+
119
+ options = OptionParser.parse ARGV
120
+
121
+ limit = options[:limit]
122
+ from_date = options[:from_date]
123
+ to_date = options[:to_date]
124
+
125
+ ignore_crawlers = options[:ignore_crawlers]
126
+ only_crawlers = options[:only_crawlers]
127
+ distinguish_crawlers = options[:distinguish_crawlers]
128
+
129
+ no_selfpoll = options[:no_selfpoll]
130
+ prefix = options[:prefix] ? "#{options[:prefix]}" : ""
131
+ suffix = options[:suffix] ? "#{options[:suffix]}" : ""
132
+ log_file = ARGV[0]
133
+
134
+
135
+ if log_file and not File.exist? log_file
136
+ puts "Error: file #{log_file} does not exist"
137
+ exit 1
138
+ end
139
+
140
+ ##############################################################################
141
+ # COMPUTE THE STATISTICS
142
+
143
+ started_at = Time.now
144
+ log = LogParser.parse log_file, options
145
+ days = days(log)
146
+
147
+ log_no_selfpolls = log.select { |x| x[:ip] != "::1" }
148
+
149
+ log_input = no_selfpoll ? log : log_no_selfpolls
150
+
151
+ # get only requested entries
152
+ log_filtered = log_input.select { |x|
153
+ (not from_date or from_date <= x[:date_time]) and
154
+ (not to_date or x[:date_time] <= to_date) and
155
+ (not ignore_crawlers or x[:bot] == false) and
156
+ (not only_crawlers or x[:bot] == true)
157
+ }
158
+ days_filtered = days(log_filtered)
159
+
160
+ printf <<EOS
161
+ #+TITLE: Apache Log Analysis: #{log_file}
162
+ #+DATE: <#{Date.today}>
163
+ #+STARTUP: showall
164
+ #+OPTIONS: ^:{}
165
+ #+HTML_HEAD: <link rel="stylesheet" type="text/css" href="ala-style.css" />
166
+ #+OPTIONS: html-style:nil
167
+ EOS
168
+
169
+ puts "\n\n* Summary"
170
+
171
+ OrgTableEmitter.emit [ ["Input file", log_file || "stdin"],
172
+ ["Ignore crawlers", options[:ignore_crawlers] == true],
173
+ ["Only crawlers", options[:only_crawlers] == true],
174
+ ["Distinguish crawlers", options[:distinguish_crawlers] == true],
175
+ ["No selfpoll", no_selfpoll],
176
+ ["Filter by date", (options[:from_date] != nil or options[:to_date] != nil)],
177
+ ["Prefix", prefix],
178
+ ["Suffix", suffix]
179
+ ],
180
+ title: "** Log Analysis Request Summary",
181
+ compact: true
182
+
183
+
184
+ OrgTableEmitter.emit [ ["First request", log.first[:date_time]],
185
+ ["Last request", log.last[:date_time]],
186
+ ["Days", days.to_s]
187
+ ],
188
+ title: "** Logging Period",
189
+ compact: true
190
+
191
+
192
+ OrgTableEmitter.emit [ ["First day (filtered)", log_filtered.first[:date_time]],
193
+ ["Last day (filtered)", log_filtered.last[:date_time]],
194
+ ["Days (filtered)", days_filtered.to_s]
195
+ ],
196
+ title: "** Portion Analyzed",
197
+ compact: true
198
+
199
+
200
+ OrgTableEmitter.emit [ ["Log size", log.size],
201
+ ["Self poll entries", log.size - log_no_selfpolls.size],
202
+ ["Entries Parsed", log_input.size],
203
+ ["Entries after filtering", log_filtered.size],
204
+ ],
205
+ title: "** Filtering",
206
+ compact: true,
207
+ name: "size"
208
+
209
+ #
210
+ # hits, unique visitors, and size per day
211
+ # take an array of hashes, group by a lambda function, count hits, visitors, and tx data
212
+ #
213
+ def group_and_count log, key
214
+ matches = log.group_by { |x| key.call(x) }
215
+
216
+ h = {}
217
+
218
+ # each key in matches is an array of hashes (all log entries matching key)
219
+ matches.each do |k, v|
220
+ h[k] = {
221
+ hits: v.size,
222
+ visitors: v.uniq { |x| [ x[:date_time].to_date, x[:ip], x[:user_agent_string] ] }.count,
223
+ tx: v.map { |x| x[:size] }.inject(&:+) / 1024,
224
+ }
225
+ end
226
+
227
+ h
228
+ end
229
+
230
+ # like the previous function, but the count function is responsible of returning a hash with the desired data
231
+ # the previous function is: group_and_generic_count log, key, lamnda { |v| { hits: v.size, visitors: v.uniq ..., tx: v.map ... } }
232
+ def group_and_generic_count log, key, count
233
+ matches = log.group_by { |x| key.call(x) }
234
+
235
+ h = {}
236
+
237
+ # each key in matches is an array of hashes (all log entries matching key)
238
+ matches.each do |k, v|
239
+ h[k] = count.call(v)
240
+ end
241
+
242
+ h
243
+ end
244
+
245
+
246
+ def totals hash
247
+ h = Hash.new
248
+ [:hits, :visitors, :tx].each do |c|
249
+ h[c] = hash.values.map { |x| x[c] }.inject(&:+)
250
+ end
251
+ h
252
+ end
253
+
254
+ ##############################################################################
255
+
256
+ table = group_and_count log_filtered, lambda { |x| x[:date_time].to_date }
257
+ totals = totals table
258
+
259
+ OrgTableEmitter.emit [ ["Hits", totals[:hits]],
260
+ ["Unique Visitors", totals[:visitors]],
261
+ ["Hits / Unique Visitor", totals[:hits] / totals[:visitors].to_f],
262
+ ["TX (Kb)", totals[:tx] ],
263
+ ["TX (Kb) / Unique Visitor", totals[:tx] / totals[:visitors]],
264
+ ],
265
+ title: "* Totals",
266
+ name: "totals",
267
+ compact: true
268
+
269
+ if (distinguish_crawlers)
270
+ bot_table = group_and_count log_filtered.select { |x| x[:bot] }, lambda { |x| x[:date_time].to_date }
271
+ bot_totals = totals bot_table
272
+
273
+ OrgTableEmitter.emit [ ["Hits", bot_totals[:hits]],
274
+ ["Unique Visitors", bot_totals[:visitors]],
275
+ ["Hits / Unique Visitor", bot_totals[:hits] / bot_totals[:visitors].to_f],
276
+ ["TX (Kb)", bot_totals[:tx] ],
277
+ ["TX (Kb) / Unique Visitor", bot_totals[:tx] / bot_totals[:visitors]],
278
+ ],
279
+ title: "** Bot Totals",
280
+ name: "bot_totals",
281
+ compact: true
282
+
283
+ vis_table = group_and_count log_filtered.select { |x| not x[:bot] }, lambda { |x| x[:date_time].to_date }
284
+ vis_totals = totals vis_table
285
+
286
+ OrgTableEmitter.emit [ ["Hits", vis_totals[:hits]],
287
+ ["Unique Visitors", vis_totals[:visitors]],
288
+ ["Hits / Unique Visitor", vis_totals[:hits] / vis_totals[:visitors].to_f],
289
+ ["TX (Kb)", vis_totals[:tx] ],
290
+ ["TX (Kb) / Unique Visitor", vis_totals[:tx] / vis_totals[:visitors]],
291
+ ],
292
+ title: "** Visitors Totals",
293
+ name: "vis_totals",
294
+ compact: true
295
+
296
+ end
297
+
298
+ enriched_table = Hash.new
299
+ table.map { |k, v| enriched_table[k] = v.merge({ dow: k.wday, month: k.month }) }
300
+
301
+ OrgTableEmitter.emit enriched_table.prepare_for_output(sort: :key),
302
+ title: "* Daily Distribution",
303
+ compact: true,
304
+ headers: ["Day", "Hits", "Visitors", "Size", "Wday", "Month"],
305
+ name: "daily_distribution"
306
+
307
+ puts <<EOS
308
+ #+BEGIN_SRC gnuplot :var data = daily_distribution :results output :exports both :file #{prefix}daily#{suffix}.svg
309
+ reset
310
+ set grid ytics linestyle 0
311
+ set grid xtics linestyle 0
312
+ set terminal svg size 1200,800 fname 'Arial'
313
+
314
+ set xdata time
315
+ set timefmt "%Y-%m-%d"
316
+ set format x "%a, %b %d"
317
+ set xtics rotate by 60 right
318
+
319
+ set title "Hits and Visitors"
320
+ set xlabel "Date"
321
+ set ylabel "Hits"
322
+ set ylabel2 "Visits"
323
+
324
+ set style fill transparent solid 0.2 noborder
325
+
326
+ plot data using 1:2 with linespoints lw 3 lc rgb "#0000AA" pointtype 5 title "Hits" axes x1y2, \\
327
+ data using 1:2 with filledcurves below x1 linecolor rgb "#0000AA" notitle axes x1y2, \\
328
+ data using 1:3 with linespoints lw 3 lc rgb "#AA0000" pointtype 7 title "Visitors", \\
329
+ data using 1:3 with filledcurves below x1 notitle linecolor rgb "#AA0000", \\
330
+ data using 1:($3+10):3 with labels notitle textcolor rgb "#AA0000", \\
331
+ data using 1:($2+100):2 with labels notitle textcolor rgb "#0000AA" axes x1y2
332
+ #+END_SRC
333
+ EOS
334
+
335
+ #
336
+ # distribution per hour
337
+ #
338
+
339
+ table = group_and_count log_filtered, lambda { |x| x[:date_time].hour }
340
+ table_processed = table.prepare_for_output(sort: :key).map { |x|
341
+ ["%02d" % x[0] + ":00"] +
342
+ [ x[1].merge(hits_per_day: x[1][:hits] / days_filtered,
343
+ visitors_per_day: x[1][:visitors] / days_filtered,
344
+ tx_per_day: x[1][:tx] / days_filtered) ] }
345
+
346
+ OrgTableEmitter.emit table_processed,
347
+ title: "* Time Distribution",
348
+ compact: true,
349
+ headers: ["Time", "Hits", "Visitors", "Size (Kb)", "Hits/Day", "Visit/Day", "Size (Kb)/Day"],
350
+ name: "time_distribution"
351
+
352
+ puts <<EOS
353
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time#{suffix}.svg
354
+ reset
355
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
356
+
357
+ set grid ytics linestyle 0
358
+
359
+ set title "Hits and Visitors"
360
+ set xlabel "Date"
361
+ set ylabel "Hits and Visits"
362
+
363
+ set style fill solid 0.25
364
+ set boxwidth 0.6
365
+
366
+ set style data histograms
367
+ set style histogram clustered gap 1
368
+
369
+ plot data using 2:xtic(1) lc rgb "#0000AA" title "Hits", \\
370
+ data using 3 lc rgb "#AA0000" title "Visitors" axes x1y2, \\
371
+ data using ($0 - 0.2):($2 + 10):2 with labels title "" textcolor rgb("#0000AA"), \\
372
+ data using ($0 + 0.2):($3 + 10):3 with labels title "" textcolor rgb("#AA0000") axes x1y2
373
+ #+END_SRC
374
+
375
+ EOS
376
+
377
+ puts <<EOS
378
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time-traffic#{suffix}.svg
379
+ reset
380
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
381
+
382
+ set grid ytics linestyle 0
383
+
384
+ set title "Traffic"
385
+ set xlabel "Date"
386
+ set ylabel "Traffic"
387
+
388
+ set style fill solid 0.50
389
+ set boxwidth 0.6
390
+
391
+ set style data histograms
392
+ set style histogram clustered gap 1
393
+
394
+ plot data using 2:xtic(1) lc rgb "#00AA00" title "Traffic", \\
395
+ data using ($0):($2 + 10):2 with labels title "" textcolor rgb("#00AA00")
396
+ #+END_SRC
397
+
398
+ EOS
399
+
400
+ #
401
+ # most requested pages
402
+ #
403
+
404
+ log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] == ".html" }
405
+ table = group_and_count log_success, lambda { |x| x[:uri] }
406
+
407
+ OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
408
+ title: "* Most Requested Pages",
409
+ compact: true,
410
+ headers: ["Page", "Hits", "Visitors", "Size"],
411
+ name: "pages"
412
+
413
+ puts "Total of #{table.size} entries."
414
+
415
+ #
416
+ # most requested URIs
417
+ #
418
+
419
+ log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] != ".html" }
420
+ table = group_and_count log_success, lambda { |x| x[:uri] }
421
+
422
+ OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
423
+ title: "* Most Requested URIs",
424
+ compact: true,
425
+ headers: ["URI", "Hits", "Visitors", "Size"],
426
+ name: "pages"
427
+
428
+ puts "Total of #{table.size} entries."
429
+
430
+ #
431
+ # 404s (Pages)
432
+ #
433
+
434
+ table = log_filtered.select { |x| x[:status] == "404" and x[:type] == ".html" }.map { |x| x[:uri] }.uniq_with_count
435
+
436
+ OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
437
+ title: "* HTML 404s",
438
+ compact: true,
439
+ headers: ["Page", "Misses"],
440
+ name: "page_miss"
441
+
442
+ puts "Total of #{table.size} entries."
443
+
444
+ #
445
+ # 404s URIs
446
+ #
447
+
448
+ table = log_filtered.select { |x| x[:status] == "404" and x[:type] != ".html" }.map { |x| x[:uri] }.uniq_with_count
449
+
450
+ OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
451
+ title: "* HTML 404s",
452
+ compact: true,
453
+ headers: ["Page", "Misses"],
454
+ name: "page_miss"
455
+
456
+ puts "Total of #{table.size} entries."
457
+
458
+ #
459
+ # Attacks
460
+ #
461
+ def reasonable_response_type ext
462
+ [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].include? ext.downcase
463
+ end
464
+
465
+ table = log_filtered.select { |x| x[:status] != "200" and not reasonable_response_type(x[:type]) }.map { |x| x[:uri] }.uniq_with_count
466
+
467
+ OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
468
+ title: "* Possible Attacks",
469
+ compact: true,
470
+ headers: ["Request", "Count"],
471
+ name: "attacks"
472
+
473
+ puts "Total of #{table.size} entries."
474
+
475
+ #
476
+ # IPs
477
+ #
478
+
479
+ table = group_and_count log_success, lambda { |x| x[:ip] }
480
+
481
+ OrgTableEmitter.emit table.prepare_for_output(sort: :key, reverse: true, limit: limit),
482
+ title: "* IPs",
483
+ compact: true,
484
+ headers: ["IP", "Hits", "Visitors", "Size"],
485
+ name: "ips"
486
+
487
+ puts "Total of #{table.size} entries."
488
+
489
+ #
490
+ # Statuses, Browsers and Platforms
491
+ #
492
+
493
+ [:status, :browser, :platform].each do |what|
494
+
495
+ result = log_filtered.map { |x| x[what] }.uniq_with_count
496
+
497
+ OrgTableEmitter.emit result.prepare_for_output(sort: :key),
498
+ title: "* #{what.to_s.capitalize}",
499
+ compact: true,
500
+ headers: [what.to_s.capitalize, "Hits"],
501
+ name: what.to_s
502
+
503
+ puts <<EOS
504
+ #+BEGIN_SRC gnuplot :var data = #{what.to_s} :results output :exports both :file #{prefix}#{what.to_s}#{suffix}.svg
505
+ reset
506
+ set grid ytics linestyle 0
507
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
508
+
509
+ set style fill solid 0.25
510
+ set boxwidth 0.6
511
+
512
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
513
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
514
+ #+END_SRC
515
+ EOS
516
+
517
+ end
518
+
519
+ #
520
+ # Statuses by day
521
+ #
522
+ result = group_and_generic_count log_filtered,
523
+ lambda { |x| x[:date_time].to_date },
524
+ lambda { |x| h = Hash.new;
525
+ h["4xx"] = x.select { |y| y[:status][0] == "4" }.count;
526
+ h["3xx"] = x.select { |y| y[:status][0] == "3" }.count;
527
+ h["2xx"] = x.select { |y| y[:status][0] == "2" }.count;
528
+ h }
529
+
530
+ OrgTableEmitter.emit result.prepare_for_output(sort: :key),
531
+ title: "* Daily Status",
532
+ compact: true,
533
+ headers: ["Day", "4xx", "3xx", "2xx"],
534
+ name: "daily_statuses"
535
+
536
+ puts <<EOS
537
+ #+BEGIN_SRC gnuplot :var data = daily_statuses :results output :exports both :file #{prefix}daily-statuses#{suffix}.svg
538
+ reset
539
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
540
+
541
+ set grid ytics linestyle 0
542
+
543
+ set title "Daily Statuses"
544
+ set xlabel "Date"
545
+ set ylabel "Number of Hits"
546
+ set xtics rotate by 60 right
547
+
548
+ set style fill solid 0.25
549
+ set boxwidth 0.6
550
+
551
+ set style data histograms
552
+ set style histogram clustered gap 1
553
+
554
+ plot data using 2:xtic(1) lc rgb "#CC0000" title "4xx", \\
555
+ data using 3 lc rgb "#0000CC" title "3xx", \\
556
+ data using 4 lc rgb "#00AA00" title "2xx", \\
557
+ data using ($0 - 1. / 4):($2 + 0.5):2 with labels title "" textcolor rgb("#CC0000"), \\
558
+ data using ($0):($3 + 0.5):3 with labels title "" textcolor rgb("#0000CC"), \\
559
+ data using ($0 + 1. / 4):($4 + 0.5):4 with labels title "" textcolor rgb("#00AA00")
560
+ #+END_SRC
561
+
562
+ EOS
563
+
564
+ #
565
+ # Referer
566
+ #
567
+ result = group_and_count log_filtered, lambda { |x| begin
568
+ URI(x[:referer]).host
569
+ rescue Exception
570
+ ""
571
+ end }
572
+ good_result = result.reject! { |k| k == nil }
573
+
574
+ OrgTableEmitter.emit good_result.prepare_for_output(sort: :key),
575
+ title: "* Referer",
576
+ compact: true,
577
+ headers: ["Referer", "Hits", "Visitors", "Size"],
578
+ name: "referers"
579
+
580
+ puts <<EOS
581
+ #+BEGIN_SRC gnuplot :var data = referers :results output :exports both :file #{prefix}referers#{suffix}.svg
582
+ reset
583
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
584
+
585
+ set grid ytics linestyle 0
586
+ set grid xtics linestyle 0
587
+
588
+ set title "Referers"
589
+ set xlabel "Date"
590
+ set xtics rotate by 60 right
591
+ set ylabel "Hits and Visits"
592
+
593
+ set style fill solid 0.45
594
+ set boxwidth 0.7
595
+
596
+ set style data histograms
597
+ set style histogram clustered gap 1
598
+
599
+ plot data using 2:xtic(1) lc rgb "#AA00AA" title "Hits", \\
600
+ data using 3 lc rgb "#0AAAA0" title "Visits", \\
601
+ data using ($0 - 1. / 3):($2 + 50):2 with labels title "" textcolor rgb("#AA00AA"), \\
602
+ data using ($0 + 1. / 3):($3 + 50):3 with labels title "" textcolor rgb("#0AAAA0")
603
+ #+END_SRC
604
+ EOS
605
+
606
+ puts <<EOS
607
+ * Local Variables :noexport:
608
+ # Local Variables:
609
+ # org-confirm-babel-evaluate: nil
610
+ # org-display-inline-images: t
611
+ # end:
612
+ EOS
613
+
614
+ ended_at = Time.now
615
+ duration = ended_at - started_at
616
+
617
+ puts <<EOS
618
+ ** Performance
619
+
620
+ | Analysis started at | #{started_at.to_s} |
621
+ | Analysis ended at | #{ended_at.to_s} |
622
+ | Duration (sec) | #{"%.3d" % duration } |
623
+ | Duration (min) | #{"%.3d" % (duration / 60 )} |
624
+ | Log size | #{log.size} |
625
+ | Entries Parsed | #{log_input.size} |
626
+ | Lines/sec | #{log_input.size / duration} |
627
+ EOS
@@ -0,0 +1,4 @@
1
+ require 'apache_log_report/log_parser_sqlite3.rb'
2
+ require 'apache_log_report/option_parser.rb'
3
+ require 'apache_log_report/version.rb'
4
+
@@ -0,0 +1,49 @@
1
+ require 'date'
2
+ require 'browser'
3
+
4
+ class LogParserHash
5
+ # make a matchdata into a Hash.
6
+ # pure magic gotten from: http://zetcode.com/db/sqliteruby/connect/
7
+ # Used during parsing to simplify the generation of the hash.
8
+ class MatchData
9
+ def to_h
10
+ names.map(&:intern).zip(captures).to_h
11
+ end
12
+ end
13
+
14
+ def parse filename, options = {}
15
+ progressbar = ProgressBar.create(output: $stderr)
16
+
17
+ content = filename ? File.readlines(filename) : ARGF.readlines
18
+ progressbar.total = content.size
19
+
20
+ # We parse combined log, which looks like:
21
+ #
22
+ # 66.249.70.16 - - [18/Aug/2020:23:03:00 +0200] "GET /eatc/assets/images/team/gunde.png HTTP/1.1" 200 61586 "-" "Googlebot-Image/1.0"
23
+ # 178.172.20.114 - - [25/Aug/2020:17:13:21 +0200] "GET /favicon.ico HTTP/1.1" 404 196 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
24
+ # we do not parse entries such as:
25
+ combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<timestamp>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
26
+
27
+ content.collect { |line|
28
+ hashie = combined_regexp.match line
29
+ hash = hashie.to_h
30
+
31
+ progressbar.increment
32
+
33
+ if hash != {}
34
+ hash[:date_time] = DateTime.parse(hash[:timestamp].sub(":", " "))
35
+ hash[:size] = hash[:size].to_i
36
+ hash[:type] = hash[:uri] ? File.extname(hash[:uri]) : ""
37
+
38
+ ua = Browser.new(hash[:user_agent_string], accept_language: "en-us")
39
+ hash[:bot] = ua.bot?
40
+ hash[:browser] = ua.name || ""
41
+ hash[:browser_version] = ua.version || ""
42
+ hash[:platform] = ua.platform.name || ""
43
+ hash[:platform_version] = ua.platform.version || ""
44
+
45
+ hash
46
+ end
47
+ }.compact
48
+ end
49
+ end
@@ -0,0 +1,99 @@
1
+ #
2
+ # SQLITE3
3
+ #
4
+ require 'sqlite3'
5
+
6
+ class LogParser
7
+ def self.parse filename, options = {}
8
+
9
+ progressbar = ProgressBar.create(output: $stderr)
10
+
11
+ content = filename ? File.readlines(filename) : ARGF.readlines
12
+ progressbar.total = content.size
13
+
14
+ db = SQLite3::Database.new ":memory:"
15
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ date_time TEXT,
18
+ ip TEXT,
19
+ remote_log_name TEXT,
20
+ method TEXT,
21
+ uri TEXT,
22
+ status TEXT,
23
+ size INTEGER,
24
+ referer TEXT,
25
+ user_agent_string TEXT,
26
+ bot INTEGER,
27
+ browser TEXT,
28
+ browser_version TEXT,
29
+ platform TEXT,
30
+ platform_version TEXT
31
+ )"
32
+
33
+ ins = db.prepare('insert into LogLine (
34
+ date_time,
35
+ ip,
36
+ remote_log_name,
37
+ method,
38
+ uri,
39
+ status,
40
+ size,
41
+ referer,
42
+ user_agent_string,
43
+ bot,
44
+ browser,
45
+ browser_version,
46
+ platform,
47
+ platform_version)
48
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
49
+
50
+ combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<datetime>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
51
+
52
+ content.collect { |line|
53
+ hashie = combined_regexp.match line
54
+
55
+ progressbar.increment
56
+
57
+ puts hashie
58
+ if hashie != {}
59
+ ua = Browser.new(hashie[:user_agent_string], accept_language: "en-us")
60
+ puts <<EOS
61
+ #{hashie[:datetime].sub(":", " ")},
62
+ #{hashie[:ip]},
63
+ #{hashie[:remote_log_name]},
64
+ #{hashie[:method]},
65
+ #{hashie[:uri]},
66
+ #{ hashie[:status]},
67
+ #{ hashie[:size].to_i},
68
+ #{ hashie[:referer]},
69
+ #{ hashie[:user_agent_string]},
70
+ #{ ua.bot? ? 1 : 0},
71
+ #{ ua.name || ""},
72
+ #{ ua.version || ""},
73
+ #{ ua.platform.name || ""},
74
+ #{ ua.platform.version || ""}
75
+ EOS
76
+
77
+ ins.execute(
78
+ hashie[:datetime].sub(":", " "),
79
+ hashie[:ip],
80
+ hashie[:remote_log_name],
81
+ hashie[:method],
82
+ hashie[:uri],
83
+ hashie[:status],
84
+ hashie[:size].to_i,
85
+ hashie[:referer],
86
+ hashie[:user_agent_string],
87
+ ua.bot? ? 1 : 0,
88
+ (ua.name || ""),
89
+ (ua.version || ""),
90
+ (ua.platform.name || ""),
91
+ (ua.platform.version || "")
92
+ )
93
+ end
94
+ }
95
+
96
+ db
97
+ end
98
+ end
99
+
@@ -0,0 +1,63 @@
1
+ require 'optparse'
2
+ require 'optparse/date'
3
+
4
+ class OptionParser
5
+ def self.parse(options)
6
+ args = {}
7
+
8
+ opt_parser = OptionParser.new do |opts|
9
+ opts.banner = "Usage: log-analyzer.rb [options] logfile"
10
+
11
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{LIMIT})") do |n|
12
+ args[:limit] = n
13
+ end
14
+
15
+ opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
16
+ args[:from_date] = n
17
+ end
18
+
19
+ opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
20
+ args[:to_date] = n
21
+ end
22
+
23
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
24
+ args[:ignore_crawlers] = true
25
+ end
26
+
27
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
28
+ args[:only_crawlers] = true
29
+ end
30
+
31
+ opts.on("-t", "--distinguish-crawlers", "Print totals distinguishing crawlers from visitors") do |n|
32
+ args[:distinguish_crawlers] = true
33
+ end
34
+
35
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
36
+ args[:no_selfpoll] = true
37
+ end
38
+
39
+ opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
40
+ args[:prefix] = n
41
+ end
42
+
43
+ opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
44
+ args[:suffix] = n
45
+ end
46
+
47
+ opts.on("-h", "--help", "Prints this help") do
48
+ puts opts
49
+ exit
50
+ end
51
+ end
52
+
53
+ opt_parser.parse!(options)
54
+
55
+ args[:limit] ||= LIMIT
56
+ args[:ignore_crawlers] ||= false
57
+ args[:only_crawlers] ||= false
58
+ args[:distinguish_crawlers] ||= false
59
+ args[:no_selfpoll] ||= false
60
+
61
+ return args
62
+ end
63
+ end
@@ -0,0 +1,3 @@
1
+ module ApacheLogReport
2
+ VERSION = "0.9.0"
3
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apache_log_report
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Adolfo Villafiorita
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-09-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: browser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: sqlite3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Generate a request report in OrgMode format from an Apache log file.
42
+ email:
43
+ - adolfo.villafiorita@ict4g.net
44
+ executables:
45
+ - apache_log_report
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".gitignore"
50
+ - CHANGELOG.org
51
+ - Gemfile
52
+ - LICENSE.txt
53
+ - README.org
54
+ - Rakefile
55
+ - apache_log_report.gemspec
56
+ - bin/console
57
+ - bin/setup
58
+ - exe/apache_log_report
59
+ - lib/apache_log_report.rb
60
+ - lib/apache_log_report/log_parser_hash.rb
61
+ - lib/apache_log_report/log_parser_sqlite3.rb
62
+ - lib/apache_log_report/option_parser.rb
63
+ - lib/apache_log_report/version.rb
64
+ homepage: https://www.ict4g.net/gitea/adolfo/apache_log_report
65
+ licenses:
66
+ - MIT
67
+ metadata:
68
+ allowed_push_host: https://rubygems.org/
69
+ homepage_uri: https://www.ict4g.net/gitea/adolfo/apache_log_report
70
+ source_code_uri: https://www.ict4g.net/gitea/adolfo/apache_log_report
71
+ changelog_uri: https://www.ict4g.net/gitea/adolfo/apache_log_report/CHANGELOG.org
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 2.3.0
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubygems_version: 3.1.2
88
+ signing_key:
89
+ specification_version: 4
90
+ summary: Generate a request report in OrgMode format from an Apache log file.
91
+ test_files: []