apache_log_report 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 911c11b89b21140783332a66419e442ed366debad950a430af6265a5cc800129
4
+ data.tar.gz: 0f4d3bf245a2436412d7419d724c0c996d7ea0c6057a847f24cde376e6cf13c5
5
+ SHA512:
6
+ metadata.gz: eed37e0f7c2098e2ca6192760ca5c9722ada8c0a5259d3c52fd9ae382b32a0d39f3da3e37bbcc9ab2446d92e35c3b538c1037ac48a4364f8d05ec2c58ab54fac
7
+ data.tar.gz: 563e8a68028d905bd8389ceefff7ed02cdb82715426fa1e2d5b47feeb471d75034e59b948a04899801e6d7b708f0dca7ff385111d163aef273b91f5c8e3bf2f4
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ *~
@@ -0,0 +1,27 @@
1
+ #+TITLE: ChangeLog
2
+ #+AUTHOR: Adolfo Villafiorita
3
+ #+STARTUP: showall
4
+
5
+ * Unreleased
6
+
7
+ This changes are in the repository but not yet released to Rubygems.
8
+
9
+ ** New Functions and Changes
10
+
11
+ ** Fixes
12
+
13
+ ** Documentation
14
+
15
+ ** Code
16
+
17
+
18
+ * Version 1.0.0
19
+
20
+ ** New Functions and Changes
21
+
22
+ ** Fixes
23
+
24
+ ** Documentation
25
+
26
+ ** Code
27
+
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in apache_log_report.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Adolfo Villafiorita
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,30 @@
1
+ #+TITLE: README
2
+ #+AUTHOR: Adolfo Villafiorita
3
+ #+STARTUP: showall
4
+
5
+ * Introduction
6
+
7
+ * Installation
8
+
9
+ * Usage
10
+
11
+ * Change Log
12
+
13
+ See the [[file:CHANGELOG.org][CHANGELOG]] file.
14
+
15
+ * Compatibility
16
+
17
+
18
+ * Author and Contributors
19
+
20
+ [[http://ict4g.net/adolfo][Adolfo Villafiorita]].
21
+
22
+ * Known Bugs
23
+
24
+ Some known bugs and an unknown number of unknown bugs.
25
+
26
+ (See the open issues for the known bugs.)
27
+
28
+ * License
29
+
30
+ Distributed under the terms of the [[http://opensource.org/licenses/MIT][MIT License]].
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,32 @@
1
+ require_relative 'lib/apache_log_report/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "apache_log_report"
5
+ spec.version = ApacheLogReport::VERSION
6
+ spec.authors = ["Adolfo Villafiorita"]
7
+ spec.email = ["adolfo.villafiorita@ict4g.net"]
8
+
9
+ spec.summary = %q{Generate a request report in OrgMode format from an Apache log file.}
10
+ spec.description = %q{Generate a request report in OrgMode format from an Apache log file.}
11
+ spec.homepage = "https://www.ict4g.net/gitea/adolfo/apache_log_report"
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata["allowed_push_host"] = "https://rubygems.org/"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://www.ict4g.net/gitea/adolfo/apache_log_report"
19
+ spec.metadata["changelog_uri"] = "https://www.ict4g.net/gitea/adolfo/apache_log_report/CHANGELOG.org"
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_dependency "browser"
31
+ spec.add_dependency "sqlite3"
32
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "apache_log_report"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,627 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'apache_log_report'
4
+ require 'progressbar'
5
+
6
+ LIMIT = 30
7
+
8
+ ##############################################################################
9
+ # MONKEY PATCH ARRAY AND HASH TO BUILD PIPES WHEN COMPUTING STATS
10
+
11
+ class Array
12
+ # counts occurrences of each element in an array and returns a hash { element => count }
13
+ def uniq_with_count
14
+ h = Hash.new(0); self.each { |l| h[l] += 1 }; h
15
+ end
16
+ end
17
+
18
+ class Hash
19
+ # sort and limit entries of a hash. Returns an array which can be output
20
+ # sort by key or value, optionally reverse, optionally return a limited number of items
21
+ # {2014 => 10, 2015 => 20, 2010 => 30} => [[2014, 10], ...]
22
+ def prepare_for_output options = {}
23
+ sorted_a = []
24
+
25
+ if options[:sort] == :key
26
+ sorted_a = self.sort_by { |k, v| k }
27
+ elsif options[:sort]
28
+ sorted_a = self.sort_by { |k, v| v[options[:sort]] }
29
+ else
30
+ sorted_a = self.to_a
31
+ end
32
+
33
+ sorted_a = sorted_a.reverse if options[:reverse]
34
+ sorted_a = sorted_a[0, options[:limit]] if options[:limit]
35
+
36
+ sorted_a
37
+ end
38
+ end
39
+
40
+ ##############################################################################
41
+ # EMITTERS ARE USED TO OUTPUT TO DIFFERENT FORMATS
42
+
43
+ class OrgTableEmitter
44
+ # print either of:
45
+ # [[ row_title, value, value, value, ...], ...]
46
+ # [[ row_title, {key: value, key: value, key: value}, ...], ...]
47
+ def self.emit table, options = {}
48
+ if table.size == 0
49
+ puts "No values to show"
50
+ return
51
+ end
52
+
53
+ puts "\n\n#{options[:title]}\n\n" if options[:title]
54
+
55
+ puts "#+NAME: #{options[:name]}" if options[:name]
56
+
57
+ # if table is in the form of a hash, transform it into an array
58
+ if table[0][1].class == Hash then
59
+ table = table.map { |x| [ x[0] ] + x[1].values }
60
+ end
61
+
62
+ # get longest row title and then longest column or use 50 and 10 as defaults
63
+ firstcol_length = options[:compact] ? 2 + table.map { |x| x[0].to_s.size }.max : 50
64
+ othercol_length = options[:compact] ? 2 + table.map { |x| x[1..-1].map { |x| x.to_s.size }.max }.max : 10
65
+ # take into account also the headers lengths'
66
+ headers_length = options[:headers] ? 2 + options[:headers][1..-1].map { |x| x.to_s.size }.max : 0
67
+ othercol_length = [othercol_length, headers_length].max
68
+
69
+ # build the formatting string
70
+ col_sizes = [ firstcol_length ] + [othercol_length] * table[0][1..-1].size
71
+ col_classes = table[0].map { |x| x.class.to_s }
72
+ col_formats = col_classes.each_with_index.map { |x, i| format_for(x, col_sizes[i]) }.join("") + "|"
73
+
74
+ # print header if asked to do so
75
+ if options[:headers]
76
+ puts (col_sizes.map { |x| "| %-#{x}s " }.join("") % options[:headers]) + "|"
77
+ puts (col_sizes.map { |x| "|#{"-" * (2 + x)}" }.join("")) + "|"
78
+ end
79
+
80
+ # print each table row
81
+ table.each do |row|
82
+ puts col_formats % row
83
+ end
84
+
85
+ puts "\n"
86
+ end
87
+
88
+ private
89
+
90
+ def self.format_for klass, size
91
+ case klass
92
+ when "String"
93
+ "| %-#{size}s "
94
+ when "Integer"
95
+ "| %#{size}d "
96
+ when "Double"
97
+ "| %#{size}.2f "
98
+ when "Float"
99
+ "| %#{size}.2f "
100
+ else
101
+ "| %#{size}s "
102
+ end
103
+ end
104
+
105
+ end
106
+
107
+
108
+ ##############################################################################
109
+ # PARSE OPTIONS
110
+
111
+
112
+ ##############################################################################
113
+ # PARSE COMMAND LINE, SETS OPTIONS, PERFORM BASIC CHECKS, AND RUN
114
+
115
+ def days log
116
+ (log.last[:date_time].to_date - log.first[:date_time].to_date).to_i
117
+ end
118
+
119
+ options = OptionParser.parse ARGV
120
+
121
+ limit = options[:limit]
122
+ from_date = options[:from_date]
123
+ to_date = options[:to_date]
124
+
125
+ ignore_crawlers = options[:ignore_crawlers]
126
+ only_crawlers = options[:only_crawlers]
127
+ distinguish_crawlers = options[:distinguish_crawlers]
128
+
129
+ no_selfpoll = options[:no_selfpoll]
130
+ prefix = options[:prefix] ? "#{options[:prefix]}" : ""
131
+ suffix = options[:suffix] ? "#{options[:suffix]}" : ""
132
+ log_file = ARGV[0]
133
+
134
+
135
+ if log_file and not File.exist? log_file
136
+ puts "Error: file #{log_file} does not exist"
137
+ exit 1
138
+ end
139
+
140
+ ##############################################################################
141
+ # COMPUTE THE STATISTICS
142
+
143
+ started_at = Time.now
144
+ log = LogParser.parse log_file, options
145
+ days = days(log)
146
+
147
+ log_no_selfpolls = log.select { |x| x[:ip] != "::1" }
148
+
149
+ log_input = no_selfpoll ? log : log_no_selfpolls
150
+
151
+ # get only requested entries
152
+ log_filtered = log_input.select { |x|
153
+ (not from_date or from_date <= x[:date_time]) and
154
+ (not to_date or x[:date_time] <= to_date) and
155
+ (not ignore_crawlers or x[:bot] == false) and
156
+ (not only_crawlers or x[:bot] == true)
157
+ }
158
+ days_filtered = days(log_filtered)
159
+
160
+ printf <<EOS
161
+ #+TITLE: Apache Log Analysis: #{log_file}
162
+ #+DATE: <#{Date.today}>
163
+ #+STARTUP: showall
164
+ #+OPTIONS: ^:{}
165
+ #+HTML_HEAD: <link rel="stylesheet" type="text/css" href="ala-style.css" />
166
+ #+OPTIONS: html-style:nil
167
+ EOS
168
+
169
+ puts "\n\n* Summary"
170
+
171
+ OrgTableEmitter.emit [ ["Input file", log_file || "stdin"],
172
+ ["Ignore crawlers", options[:ignore_crawlers] == true],
173
+ ["Only crawlers", options[:only_crawlers] == true],
174
+ ["Distinguish crawlers", options[:distinguish_crawlers] == true],
175
+ ["No selfpoll", no_selfpoll],
176
+ ["Filter by date", (options[:from_date] != nil or options[:to_date] != nil)],
177
+ ["Prefix", prefix],
178
+ ["Suffix", suffix]
179
+ ],
180
+ title: "** Log Analysis Request Summary",
181
+ compact: true
182
+
183
+
184
+ OrgTableEmitter.emit [ ["First request", log.first[:date_time]],
185
+ ["Last request", log.last[:date_time]],
186
+ ["Days", days.to_s]
187
+ ],
188
+ title: "** Logging Period",
189
+ compact: true
190
+
191
+
192
+ OrgTableEmitter.emit [ ["First day (filtered)", log_filtered.first[:date_time]],
193
+ ["Last day (filtered)", log_filtered.last[:date_time]],
194
+ ["Days (filtered)", days_filtered.to_s]
195
+ ],
196
+ title: "** Portion Analyzed",
197
+ compact: true
198
+
199
+
200
+ OrgTableEmitter.emit [ ["Log size", log.size],
201
+ ["Self poll entries", log.size - log_no_selfpolls.size],
202
+ ["Entries Parsed", log_input.size],
203
+ ["Entries after filtering", log_filtered.size],
204
+ ],
205
+ title: "** Filtering",
206
+ compact: true,
207
+ name: "size"
208
+
209
+ #
210
+ # hits, unique visitors, and size per day
211
+ # take an array of hashes, group by a lambda function, count hits, visitors, and tx data
212
+ #
213
+ def group_and_count log, key
214
+ matches = log.group_by { |x| key.call(x) }
215
+
216
+ h = {}
217
+
218
+ # each key in matches is an array of hashes (all log entries matching key)
219
+ matches.each do |k, v|
220
+ h[k] = {
221
+ hits: v.size,
222
+ visitors: v.uniq { |x| [ x[:date_time].to_date, x[:ip], x[:user_agent_string] ] }.count,
223
+ tx: v.map { |x| x[:size] }.inject(&:+) / 1024,
224
+ }
225
+ end
226
+
227
+ h
228
+ end
229
+
230
+ # like the previous function, but the count function is responsible of returning a hash with the desired data
231
+ # the previous function is: group_and_generic_count log, key, lamnda { |v| { hits: v.size, visitors: v.uniq ..., tx: v.map ... } }
232
+ def group_and_generic_count log, key, count
233
+ matches = log.group_by { |x| key.call(x) }
234
+
235
+ h = {}
236
+
237
+ # each key in matches is an array of hashes (all log entries matching key)
238
+ matches.each do |k, v|
239
+ h[k] = count.call(v)
240
+ end
241
+
242
+ h
243
+ end
244
+
245
+
246
+ def totals hash
247
+ h = Hash.new
248
+ [:hits, :visitors, :tx].each do |c|
249
+ h[c] = hash.values.map { |x| x[c] }.inject(&:+)
250
+ end
251
+ h
252
+ end
253
+
254
+ ##############################################################################
255
+
256
+ table = group_and_count log_filtered, lambda { |x| x[:date_time].to_date }
257
+ totals = totals table
258
+
259
+ OrgTableEmitter.emit [ ["Hits", totals[:hits]],
260
+ ["Unique Visitors", totals[:visitors]],
261
+ ["Hits / Unique Visitor", totals[:hits] / totals[:visitors].to_f],
262
+ ["TX (Kb)", totals[:tx] ],
263
+ ["TX (Kb) / Unique Visitor", totals[:tx] / totals[:visitors]],
264
+ ],
265
+ title: "* Totals",
266
+ name: "totals",
267
+ compact: true
268
+
269
+ if (distinguish_crawlers)
270
+ bot_table = group_and_count log_filtered.select { |x| x[:bot] }, lambda { |x| x[:date_time].to_date }
271
+ bot_totals = totals bot_table
272
+
273
+ OrgTableEmitter.emit [ ["Hits", bot_totals[:hits]],
274
+ ["Unique Visitors", bot_totals[:visitors]],
275
+ ["Hits / Unique Visitor", bot_totals[:hits] / bot_totals[:visitors].to_f],
276
+ ["TX (Kb)", bot_totals[:tx] ],
277
+ ["TX (Kb) / Unique Visitor", bot_totals[:tx] / bot_totals[:visitors]],
278
+ ],
279
+ title: "** Bot Totals",
280
+ name: "bot_totals",
281
+ compact: true
282
+
283
+ vis_table = group_and_count log_filtered.select { |x| not x[:bot] }, lambda { |x| x[:date_time].to_date }
284
+ vis_totals = totals vis_table
285
+
286
+ OrgTableEmitter.emit [ ["Hits", vis_totals[:hits]],
287
+ ["Unique Visitors", vis_totals[:visitors]],
288
+ ["Hits / Unique Visitor", vis_totals[:hits] / vis_totals[:visitors].to_f],
289
+ ["TX (Kb)", vis_totals[:tx] ],
290
+ ["TX (Kb) / Unique Visitor", vis_totals[:tx] / vis_totals[:visitors]],
291
+ ],
292
+ title: "** Visitors Totals",
293
+ name: "vis_totals",
294
+ compact: true
295
+
296
+ end
297
+
298
+ enriched_table = Hash.new
299
+ table.map { |k, v| enriched_table[k] = v.merge({ dow: k.wday, month: k.month }) }
300
+
301
+ OrgTableEmitter.emit enriched_table.prepare_for_output(sort: :key),
302
+ title: "* Daily Distribution",
303
+ compact: true,
304
+ headers: ["Day", "Hits", "Visitors", "Size", "Wday", "Month"],
305
+ name: "daily_distribution"
306
+
307
+ puts <<EOS
308
+ #+BEGIN_SRC gnuplot :var data = daily_distribution :results output :exports both :file #{prefix}daily#{suffix}.svg
309
+ reset
310
+ set grid ytics linestyle 0
311
+ set grid xtics linestyle 0
312
+ set terminal svg size 1200,800 fname 'Arial'
313
+
314
+ set xdata time
315
+ set timefmt "%Y-%m-%d"
316
+ set format x "%a, %b %d"
317
+ set xtics rotate by 60 right
318
+
319
+ set title "Hits and Visitors"
320
+ set xlabel "Date"
321
+ set ylabel "Hits"
322
+ set ylabel2 "Visits"
323
+
324
+ set style fill transparent solid 0.2 noborder
325
+
326
+ plot data using 1:2 with linespoints lw 3 lc rgb "#0000AA" pointtype 5 title "Hits" axes x1y2, \\
327
+ data using 1:2 with filledcurves below x1 linecolor rgb "#0000AA" notitle axes x1y2, \\
328
+ data using 1:3 with linespoints lw 3 lc rgb "#AA0000" pointtype 7 title "Visitors", \\
329
+ data using 1:3 with filledcurves below x1 notitle linecolor rgb "#AA0000", \\
330
+ data using 1:($3+10):3 with labels notitle textcolor rgb "#AA0000", \\
331
+ data using 1:($2+100):2 with labels notitle textcolor rgb "#0000AA" axes x1y2
332
+ #+END_SRC
333
+ EOS
334
+
335
+ #
336
+ # distribution per hour
337
+ #
338
+
339
+ table = group_and_count log_filtered, lambda { |x| x[:date_time].hour }
340
+ table_processed = table.prepare_for_output(sort: :key).map { |x|
341
+ ["%02d" % x[0] + ":00"] +
342
+ [ x[1].merge(hits_per_day: x[1][:hits] / days_filtered,
343
+ visitors_per_day: x[1][:visitors] / days_filtered,
344
+ tx_per_day: x[1][:tx] / days_filtered) ] }
345
+
346
+ OrgTableEmitter.emit table_processed,
347
+ title: "* Time Distribution",
348
+ compact: true,
349
+ headers: ["Time", "Hits", "Visitors", "Size (Kb)", "Hits/Day", "Visit/Day", "Size (Kb)/Day"],
350
+ name: "time_distribution"
351
+
352
+ puts <<EOS
353
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time#{suffix}.svg
354
+ reset
355
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
356
+
357
+ set grid ytics linestyle 0
358
+
359
+ set title "Hits and Visitors"
360
+ set xlabel "Date"
361
+ set ylabel "Hits and Visits"
362
+
363
+ set style fill solid 0.25
364
+ set boxwidth 0.6
365
+
366
+ set style data histograms
367
+ set style histogram clustered gap 1
368
+
369
+ plot data using 2:xtic(1) lc rgb "#0000AA" title "Hits", \\
370
+ data using 3 lc rgb "#AA0000" title "Visitors" axes x1y2, \\
371
+ data using ($0 - 0.2):($2 + 10):2 with labels title "" textcolor rgb("#0000AA"), \\
372
+ data using ($0 + 0.2):($3 + 10):3 with labels title "" textcolor rgb("#AA0000") axes x1y2
373
+ #+END_SRC
374
+
375
+ EOS
376
+
377
+ puts <<EOS
378
+ #+BEGIN_SRC gnuplot :var data = time_distribution :results output :exports both :file #{prefix}time-traffic#{suffix}.svg
379
+ reset
380
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
381
+
382
+ set grid ytics linestyle 0
383
+
384
+ set title "Traffic"
385
+ set xlabel "Date"
386
+ set ylabel "Traffic"
387
+
388
+ set style fill solid 0.50
389
+ set boxwidth 0.6
390
+
391
+ set style data histograms
392
+ set style histogram clustered gap 1
393
+
394
+ plot data using 2:xtic(1) lc rgb "#00AA00" title "Traffic", \\
395
+ data using ($0):($2 + 10):2 with labels title "" textcolor rgb("#00AA00")
396
+ #+END_SRC
397
+
398
+ EOS
399
+
400
+ #
401
+ # most requested pages
402
+ #
403
+
404
+ log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] == ".html" }
405
+ table = group_and_count log_success, lambda { |x| x[:uri] }
406
+
407
+ OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
408
+ title: "* Most Requested Pages",
409
+ compact: true,
410
+ headers: ["Page", "Hits", "Visitors", "Size"],
411
+ name: "pages"
412
+
413
+ puts "Total of #{table.size} entries."
414
+
415
+ #
416
+ # most requested URIs
417
+ #
418
+
419
+ log_success = log_filtered.select { |x| (x[:status][0] == "2" or x[:status][0] == "3") and x[:type] != ".html" }
420
+ table = group_and_count log_success, lambda { |x| x[:uri] }
421
+
422
+ OrgTableEmitter.emit table.prepare_for_output(sort: :hits, reverse: true, limit: limit),
423
+ title: "* Most Requested URIs",
424
+ compact: true,
425
+ headers: ["URI", "Hits", "Visitors", "Size"],
426
+ name: "pages"
427
+
428
+ puts "Total of #{table.size} entries."
429
+
430
+ #
431
+ # 404s (Pages)
432
+ #
433
+
434
+ table = log_filtered.select { |x| x[:status] == "404" and x[:type] == ".html" }.map { |x| x[:uri] }.uniq_with_count
435
+
436
+ OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
437
+ title: "* HTML 404s",
438
+ compact: true,
439
+ headers: ["Page", "Misses"],
440
+ name: "page_miss"
441
+
442
+ puts "Total of #{table.size} entries."
443
+
444
+ #
445
+ # 404s URIs
446
+ #
447
+
448
+ table = log_filtered.select { |x| x[:status] == "404" and x[:type] != ".html" }.map { |x| x[:uri] }.uniq_with_count
449
+
450
+ OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
451
+ title: "* HTML 404s",
452
+ compact: true,
453
+ headers: ["Page", "Misses"],
454
+ name: "page_miss"
455
+
456
+ puts "Total of #{table.size} entries."
457
+
458
+ #
459
+ # Attacks
460
+ #
461
+ def reasonable_response_type ext
462
+ [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].include? ext.downcase
463
+ end
464
+
465
+ table = log_filtered.select { |x| x[:status] != "200" and not reasonable_response_type(x[:type]) }.map { |x| x[:uri] }.uniq_with_count
466
+
467
+ OrgTableEmitter.emit table.prepare_for_output(reverse: true, sort: 0, limit: limit),
468
+ title: "* Possible Attacks",
469
+ compact: true,
470
+ headers: ["Request", "Count"],
471
+ name: "attacks"
472
+
473
+ puts "Total of #{table.size} entries."
474
+
475
+ #
476
+ # IPs
477
+ #
478
+
479
+ table = group_and_count log_success, lambda { |x| x[:ip] }
480
+
481
+ OrgTableEmitter.emit table.prepare_for_output(sort: :key, reverse: true, limit: limit),
482
+ title: "* IPs",
483
+ compact: true,
484
+ headers: ["IP", "Hits", "Visitors", "Size"],
485
+ name: "ips"
486
+
487
+ puts "Total of #{table.size} entries."
488
+
489
+ #
490
+ # Statuses, Browsers and Platforms
491
+ #
492
+
493
+ [:status, :browser, :platform].each do |what|
494
+
495
+ result = log_filtered.map { |x| x[what] }.uniq_with_count
496
+
497
+ OrgTableEmitter.emit result.prepare_for_output(sort: :key),
498
+ title: "* #{what.to_s.capitalize}",
499
+ compact: true,
500
+ headers: [what.to_s.capitalize, "Hits"],
501
+ name: what.to_s
502
+
503
+ puts <<EOS
504
+ #+BEGIN_SRC gnuplot :var data = #{what.to_s} :results output :exports both :file #{prefix}#{what.to_s}#{suffix}.svg
505
+ reset
506
+ set grid ytics linestyle 0
507
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
508
+
509
+ set style fill solid 0.25
510
+ set boxwidth 0.6
511
+
512
+ plot data using 2:xtic(1) with boxes lc rgb "#0000AA" title "Hits", \\
513
+ data using ($0):($2+100):2 with labels textcolor rgb "#0000AA"
514
+ #+END_SRC
515
+ EOS
516
+
517
+ end
518
+
519
+ #
520
+ # Statuses by day
521
+ #
522
+ result = group_and_generic_count log_filtered,
523
+ lambda { |x| x[:date_time].to_date },
524
+ lambda { |x| h = Hash.new;
525
+ h["4xx"] = x.select { |y| y[:status][0] == "4" }.count;
526
+ h["3xx"] = x.select { |y| y[:status][0] == "3" }.count;
527
+ h["2xx"] = x.select { |y| y[:status][0] == "2" }.count;
528
+ h }
529
+
530
+ OrgTableEmitter.emit result.prepare_for_output(sort: :key),
531
+ title: "* Daily Status",
532
+ compact: true,
533
+ headers: ["Day", "4xx", "3xx", "2xx"],
534
+ name: "daily_statuses"
535
+
536
+ puts <<EOS
537
+ #+BEGIN_SRC gnuplot :var data = daily_statuses :results output :exports both :file #{prefix}daily-statuses#{suffix}.svg
538
+ reset
539
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
540
+
541
+ set grid ytics linestyle 0
542
+
543
+ set title "Daily Statuses"
544
+ set xlabel "Date"
545
+ set ylabel "Number of Hits"
546
+ set xtics rotate by 60 right
547
+
548
+ set style fill solid 0.25
549
+ set boxwidth 0.6
550
+
551
+ set style data histograms
552
+ set style histogram clustered gap 1
553
+
554
+ plot data using 2:xtic(1) lc rgb "#CC0000" title "4xx", \\
555
+ data using 3 lc rgb "#0000CC" title "3xx", \\
556
+ data using 4 lc rgb "#00AA00" title "2xx", \\
557
+ data using ($0 - 1. / 4):($2 + 0.5):2 with labels title "" textcolor rgb("#CC0000"), \\
558
+ data using ($0):($3 + 0.5):3 with labels title "" textcolor rgb("#0000CC"), \\
559
+ data using ($0 + 1. / 4):($4 + 0.5):4 with labels title "" textcolor rgb("#00AA00")
560
+ #+END_SRC
561
+
562
+ EOS
563
+
564
+ #
565
+ # Referer
566
+ #
567
+ result = group_and_count log_filtered, lambda { |x| begin
568
+ URI(x[:referer]).host
569
+ rescue Exception
570
+ ""
571
+ end }
572
+ good_result = result.reject! { |k| k == nil }
573
+
574
+ OrgTableEmitter.emit good_result.prepare_for_output(sort: :key),
575
+ title: "* Referer",
576
+ compact: true,
577
+ headers: ["Referer", "Hits", "Visitors", "Size"],
578
+ name: "referers"
579
+
580
+ puts <<EOS
581
+ #+BEGIN_SRC gnuplot :var data = referers :results output :exports both :file #{prefix}referers#{suffix}.svg
582
+ reset
583
+ set terminal svg size 1200,800 fname 'Arial' fsize 10
584
+
585
+ set grid ytics linestyle 0
586
+ set grid xtics linestyle 0
587
+
588
+ set title "Referers"
589
+ set xlabel "Date"
590
+ set xtics rotate by 60 right
591
+ set ylabel "Hits and Visits"
592
+
593
+ set style fill solid 0.45
594
+ set boxwidth 0.7
595
+
596
+ set style data histograms
597
+ set style histogram clustered gap 1
598
+
599
+ plot data using 2:xtic(1) lc rgb "#AA00AA" title "Hits", \\
600
+ data using 3 lc rgb "#0AAAA0" title "Visits", \\
601
+ data using ($0 - 1. / 3):($2 + 50):2 with labels title "" textcolor rgb("#AA00AA"), \\
602
+ data using ($0 + 1. / 3):($3 + 50):3 with labels title "" textcolor rgb("#0AAAA0")
603
+ #+END_SRC
604
+ EOS
605
+
606
+ puts <<EOS
607
+ * Local Variables :noexport:
608
+ # Local Variables:
609
+ # org-confirm-babel-evaluate: nil
610
+ # org-display-inline-images: t
611
+ # end:
612
+ EOS
613
+
614
+ ended_at = Time.now
615
+ duration = ended_at - started_at
616
+
617
+ puts <<EOS
618
+ ** Performance
619
+
620
+ | Analysis started at | #{started_at.to_s} |
621
+ | Analysis ended at | #{ended_at.to_s} |
622
+ | Duration (sec) | #{"%.3d" % duration } |
623
+ | Duration (min) | #{"%.3d" % (duration / 60 )} |
624
+ | Log size | #{log.size} |
625
+ | Entries Parsed | #{log_input.size} |
626
+ | Lines/sec | #{log_input.size / duration} |
627
+ EOS
@@ -0,0 +1,4 @@
1
+ require 'apache_log_report/log_parser_sqlite3.rb'
2
+ require 'apache_log_report/option_parser.rb'
3
+ require 'apache_log_report/version.rb'
4
+
@@ -0,0 +1,49 @@
1
+ require 'date'
2
+ require 'browser'
3
+
4
+ class LogParserHash
5
+ # make a matchdata into a Hash.
6
+ # pure magic gotten from: http://zetcode.com/db/sqliteruby/connect/
7
+ # Used during parsing to simplify the generation of the hash.
8
+ class MatchData
9
+ def to_h
10
+ names.map(&:intern).zip(captures).to_h
11
+ end
12
+ end
13
+
14
+ def parse filename, options = {}
15
+ progressbar = ProgressBar.create(output: $stderr)
16
+
17
+ content = filename ? File.readlines(filename) : ARGF.readlines
18
+ progressbar.total = content.size
19
+
20
+ # We parse combined log, which looks like:
21
+ #
22
+ # 66.249.70.16 - - [18/Aug/2020:23:03:00 +0200] "GET /eatc/assets/images/team/gunde.png HTTP/1.1" 200 61586 "-" "Googlebot-Image/1.0"
23
+ # 178.172.20.114 - - [25/Aug/2020:17:13:21 +0200] "GET /favicon.ico HTTP/1.1" 404 196 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
24
+ # we do not parse entries such as:
25
+ combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<timestamp>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
26
+
27
+ content.collect { |line|
28
+ hashie = combined_regexp.match line
29
+ hash = hashie.to_h
30
+
31
+ progressbar.increment
32
+
33
+ if hash != {}
34
+ hash[:date_time] = DateTime.parse(hash[:timestamp].sub(":", " "))
35
+ hash[:size] = hash[:size].to_i
36
+ hash[:type] = hash[:uri] ? File.extname(hash[:uri]) : ""
37
+
38
+ ua = Browser.new(hash[:user_agent_string], accept_language: "en-us")
39
+ hash[:bot] = ua.bot?
40
+ hash[:browser] = ua.name || ""
41
+ hash[:browser_version] = ua.version || ""
42
+ hash[:platform] = ua.platform.name || ""
43
+ hash[:platform_version] = ua.platform.version || ""
44
+
45
+ hash
46
+ end
47
+ }.compact
48
+ end
49
+ end
@@ -0,0 +1,99 @@
1
+ #
2
+ # SQLITE3
3
+ #
4
+ require 'sqlite3'
5
+
6
+ class LogParser
7
+ def self.parse filename, options = {}
8
+
9
+ progressbar = ProgressBar.create(output: $stderr)
10
+
11
+ content = filename ? File.readlines(filename) : ARGF.readlines
12
+ progressbar.total = content.size
13
+
14
+ db = SQLite3::Database.new ":memory:"
15
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ date_time TEXT,
18
+ ip TEXT,
19
+ remote_log_name TEXT,
20
+ method TEXT,
21
+ uri TEXT,
22
+ status TEXT,
23
+ size INTEGER,
24
+ referer TEXT,
25
+ user_agent_string TEXT,
26
+ bot INTEGER,
27
+ browser TEXT,
28
+ browser_version TEXT,
29
+ platform TEXT,
30
+ platform_version TEXT
31
+ )"
32
+
33
+ ins = db.prepare('insert into LogLine (
34
+ date_time,
35
+ ip,
36
+ remote_log_name,
37
+ method,
38
+ uri,
39
+ status,
40
+ size,
41
+ referer,
42
+ user_agent_string,
43
+ bot,
44
+ browser,
45
+ browser_version,
46
+ platform,
47
+ platform_version)
48
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
49
+
50
+ combined_regexp = /^(?<ip>\S+) \S+ (?<remote_log_name>\S+) \[(?<datetime>[^\]]+)\] "(?<method>[A-Z]+) (?<uri>.+)? HTTP\/[0-9.]+" (?<status>[0-9]{3}) (?<size>[0-9]+|-) "(?<referer>[^"]*)" "(?<user_agent_string>[^"]+)"/
51
+
52
+ content.collect { |line|
53
+ hashie = combined_regexp.match line
54
+
55
+ progressbar.increment
56
+
57
+ puts hashie
58
+ if hashie != {}
59
+ ua = Browser.new(hashie[:user_agent_string], accept_language: "en-us")
60
+ puts <<EOS
61
+ #{hashie[:datetime].sub(":", " ")},
62
+ #{hashie[:ip]},
63
+ #{hashie[:remote_log_name]},
64
+ #{hashie[:method]},
65
+ #{hashie[:uri]},
66
+ #{ hashie[:status]},
67
+ #{ hashie[:size].to_i},
68
+ #{ hashie[:referer]},
69
+ #{ hashie[:user_agent_string]},
70
+ #{ ua.bot? ? 1 : 0},
71
+ #{ ua.name || ""},
72
+ #{ ua.version || ""},
73
+ #{ ua.platform.name || ""},
74
+ #{ ua.platform.version || ""}
75
+ EOS
76
+
77
+ ins.execute(
78
+ hashie[:datetime].sub(":", " "),
79
+ hashie[:ip],
80
+ hashie[:remote_log_name],
81
+ hashie[:method],
82
+ hashie[:uri],
83
+ hashie[:status],
84
+ hashie[:size].to_i,
85
+ hashie[:referer],
86
+ hashie[:user_agent_string],
87
+ ua.bot? ? 1 : 0,
88
+ (ua.name || ""),
89
+ (ua.version || ""),
90
+ (ua.platform.name || ""),
91
+ (ua.platform.version || "")
92
+ )
93
+ end
94
+ }
95
+
96
+ db
97
+ end
98
+ end
99
+
@@ -0,0 +1,63 @@
1
+ require 'optparse'
2
+ require 'optparse/date'
3
+
4
+ class OptionParser
5
+ def self.parse(options)
6
+ args = {}
7
+
8
+ opt_parser = OptionParser.new do |opts|
9
+ opts.banner = "Usage: log-analyzer.rb [options] logfile"
10
+
11
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{LIMIT})") do |n|
12
+ args[:limit] = n
13
+ end
14
+
15
+ opts.on("-bDATE", "--from-date=DATE", DateTime, "Consider entries after or on DATE") do |n|
16
+ args[:from_date] = n
17
+ end
18
+
19
+ opts.on("-eDATE", "--to-date=DATE", DateTime, "Consider entries before or on DATE") do |n|
20
+ args[:to_date] = n
21
+ end
22
+
23
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do |n|
24
+ args[:ignore_crawlers] = true
25
+ end
26
+
27
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do |n|
28
+ args[:only_crawlers] = true
29
+ end
30
+
31
+ opts.on("-t", "--distinguish-crawlers", "Print totals distinguishing crawlers from visitors") do |n|
32
+ args[:distinguish_crawlers] = true
33
+ end
34
+
35
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do |n|
36
+ args[:no_selfpoll] = true
37
+ end
38
+
39
+ opts.on("-u", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
40
+ args[:prefix] = n
41
+ end
42
+
43
+ opts.on("-w", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
44
+ args[:suffix] = n
45
+ end
46
+
47
+ opts.on("-h", "--help", "Prints this help") do
48
+ puts opts
49
+ exit
50
+ end
51
+ end
52
+
53
+ opt_parser.parse!(options)
54
+
55
+ args[:limit] ||= LIMIT
56
+ args[:ignore_crawlers] ||= false
57
+ args[:only_crawlers] ||= false
58
+ args[:distinguish_crawlers] ||= false
59
+ args[:no_selfpoll] ||= false
60
+
61
+ return args
62
+ end
63
+ end
@@ -0,0 +1,3 @@
1
+ module ApacheLogReport
2
+ VERSION = "0.9.0"
3
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apache_log_report
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Adolfo Villafiorita
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-09-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: browser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: sqlite3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Generate a request report in OrgMode format from an Apache log file.
42
+ email:
43
+ - adolfo.villafiorita@ict4g.net
44
+ executables:
45
+ - apache_log_report
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".gitignore"
50
+ - CHANGELOG.org
51
+ - Gemfile
52
+ - LICENSE.txt
53
+ - README.org
54
+ - Rakefile
55
+ - apache_log_report.gemspec
56
+ - bin/console
57
+ - bin/setup
58
+ - exe/apache_log_report
59
+ - lib/apache_log_report.rb
60
+ - lib/apache_log_report/log_parser_hash.rb
61
+ - lib/apache_log_report/log_parser_sqlite3.rb
62
+ - lib/apache_log_report/option_parser.rb
63
+ - lib/apache_log_report/version.rb
64
+ homepage: https://www.ict4g.net/gitea/adolfo/apache_log_report
65
+ licenses:
66
+ - MIT
67
+ metadata:
68
+ allowed_push_host: https://rubygems.org/
69
+ homepage_uri: https://www.ict4g.net/gitea/adolfo/apache_log_report
70
+ source_code_uri: https://www.ict4g.net/gitea/adolfo/apache_log_report
71
+ changelog_uri: https://www.ict4g.net/gitea/adolfo/apache_log_report/CHANGELOG.org
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 2.3.0
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubygems_version: 3.1.2
88
+ signing_key:
89
+ specification_version: 4
90
+ summary: Generate a request report in OrgMode format from an Apache log file.
91
+ test_files: []