apache_log_report 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be24b7fd62ff47195fe5e19cd2717252a37a687a8d92745add21acc952fc50a3
4
- data.tar.gz: 3f5daff6a7447b6acca3a7ef456b148c3de49ff2dee87b75d419763dc2bbe2c4
3
+ metadata.gz: f35ffbe99784dc77cbd61bb7b949237b9b380448b4829e442f0ad17513adc7b7
4
+ data.tar.gz: 1d5030f64425293895b4b1fc84199178971bd34bde0d9b8450e868ec1d427cc3
5
5
  SHA512:
6
- metadata.gz: d5271fd26059e942a730c649cd6ea00c35b459c3f5b130bd2d0ae707717b3554c90b7adff8364f384e56b5e8782ca147b1fd40f957290aed4a7bc443eb38387c
7
- data.tar.gz: 475dfe3e49e76434c620bb75558bc2b5e40c6441fe455fca58d9b70b366a549f76a9d1bbbf9aae4bf5c9e0daa013770cefa9047fa11b8206f7eff4425be54b50
6
+ metadata.gz: 3eed1f80dec99429d5a3f25a99fd6e7ceca28547df852d97422a7cdb11b5d045592eb1c7e15443aca6c38744a7b62f59422049783f86d9b379039374c1b4ef09
7
+ data.tar.gz: 7de8e31cc0024d34191b6b70f8716bc4d31e037764e0f776d75a806490fe1f8cb170c8c989c570fb84f0329c0f12298d5a3bc39b32caa3cdcd826775c3ceaaae
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- apache_log_report (0.9.7)
4
+ apache_log_report (1.1.0)
5
5
  apache_log-parser
6
6
  browser
7
7
  sqlite3
@@ -11,12 +11,12 @@ GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
13
  apache_log-parser (3.1.2)
14
- browser (5.0.0)
14
+ browser (5.3.1)
15
15
  rake (12.3.3)
16
16
  sqlite3 (1.4.2)
17
- terminal-table (1.8.0)
18
- unicode-display_width (~> 1.1, >= 1.1.1)
19
- unicode-display_width (1.7.0)
17
+ terminal-table (3.0.2)
18
+ unicode-display_width (>= 1.1.1, < 3)
19
+ unicode-display_width (2.1.0)
20
20
 
21
21
  PLATFORMS
22
22
  ruby
@@ -26,4 +26,4 @@ DEPENDENCIES
26
26
  rake (~> 12.0)
27
27
 
28
28
  BUNDLED WITH
29
- 2.1.4
29
+ 2.2.29
@@ -2,12 +2,13 @@
2
2
 
3
3
  require 'apache_log_report.rb'
4
4
 
5
+
5
6
  #
6
7
  # Parse Command Line Arguments
7
8
  #
8
9
 
9
10
  @command = ARGV.join(" ")
10
- @options = ApacheLogReport.options_parse ARGV
11
+ @options = ApacheLogReport::OptionsParser.parse ARGV
11
12
  @log_file = ARGV[0]
12
13
 
13
14
  if @log_file and not File.exist? @log_file
@@ -20,12 +21,23 @@ end
20
21
  #
21
22
 
22
23
  @started_at = Time.now
23
- @db = ApacheLogReport.parse @log_file
24
- ApacheLogReport.analyze_data @db, @options
24
+
25
+ @db = ApacheLogReport::LogParser.parse @log_file
26
+ @data = ApacheLogReport::DataCruncher.crunch @db, @options
27
+
25
28
  @ended_at = Time.now
26
29
  @duration = @ended_at - @started_at
27
30
 
31
+ @data = @data.merge({
32
+ command: @command,
33
+ log_file: @log_file,
34
+ started_at: @started_at,
35
+ ended_at: @ended_at,
36
+ duration: @duration
37
+ })
38
+
28
39
  #
29
40
  # Emit Output
30
41
  #
31
- puts ApacheLogReport.emit @options, @command, @log_file, @started_at, @ended_at, @duration
42
+
43
+ puts ApacheLogReport::Emitter.emit @data, @options
@@ -0,0 +1,245 @@
1
+ module ApacheLogReport
2
+
3
+ #
4
+ # parse command line options
5
+ #
6
+ require 'optparse'
7
+ require 'optparse/date'
8
+ require 'apache_log_report/version'
9
+
10
+ def self.options_parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-v", "--version", "Prints version information") do
54
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
55
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
56
+ puts "Distributed under the terms of the MIT license"
57
+ puts ""
58
+ puts "Written by Adolfo Villafiorita"
59
+ exit
60
+ end
61
+
62
+ opts.on("-h", "--help", "Prints this help") do
63
+ puts opts
64
+ puts "This is version #{ApacheLogReport::VERSION}"
65
+ exit
66
+ end
67
+ end
68
+
69
+ opt_parser.parse!(options)
70
+
71
+ args[:limit] ||= limit
72
+ args[:ignore_crawlers] ||= false
73
+ args[:no_selfpoll] ||= false
74
+ args[:only_crawlers] ||= false
75
+ args[:prefix] ||= ""
76
+ args[:suffix] ||= ""
77
+ args[:code_export] ||= "both"
78
+
79
+ return args
80
+ end
81
+
82
+ #
83
+ # parse an Apache log file and return a SQLite3 DB
84
+ #
85
+ require 'apache_log/parser'
86
+ require 'sqlite3'
87
+ require 'browser'
88
+
89
+ def self.parse filename, options = {}
90
+ content = filename ? File.readlines(filename) : ARGF.readlines
91
+
92
+ db = SQLite3::Database.new ":memory:"
93
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
94
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
95
+ datetime TEXT,
96
+ ip TEXT,
97
+ user TEXT,
98
+ unique_visitor TEXT,
99
+ method TEXT,
100
+ path TEXT,
101
+ extension TEXT,
102
+ status TEXT,
103
+ size INTEGER,
104
+ referer TEXT,
105
+ user_agent TEXT,
106
+ bot INTEGER,
107
+ browser TEXT,
108
+ browser_version TEXT,
109
+ platform TEXT,
110
+ platform_version TEXT)"
111
+
112
+ ins = db.prepare('insert into LogLine (
113
+ datetime,
114
+ ip,
115
+ user,
116
+ unique_visitor,
117
+ method,
118
+ path,
119
+ extension,
120
+ status,
121
+ size,
122
+ referer,
123
+ user_agent,
124
+ bot,
125
+ browser,
126
+ browser_version,
127
+ platform,
128
+ platform_version)
129
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
130
+
131
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
132
+
133
+ content.each do |line|
134
+ begin
135
+ hash = parser.parse line
136
+
137
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
138
+ ins.execute(
139
+ hash[:datetime].iso8601,
140
+ hash[:remote_host],
141
+ hash[:user],
142
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
143
+ hash[:request][:method],
144
+ hash[:request][:path],
145
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
146
+ hash[:status],
147
+ hash[:size].to_i,
148
+ hash[:referer],
149
+ hash[:user_agent],
150
+ ua.bot? ? 1 : 0,
151
+ (ua.name || ""),
152
+ (ua.version || ""),
153
+ (ua.platform.name || ""),
154
+ (ua.platform.version || "")
155
+ )
156
+ rescue
157
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
158
+ end
159
+ end
160
+
161
+ db
162
+ end
163
+
164
+ #
165
+ # take a sqlite3 database and analyze data
166
+ #
167
+ def self.analyze_data db, options = {}
168
+
169
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
170
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
171
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
172
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
173
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
174
+
175
+ #
176
+ # generate the where clause corresponding to the command line options to filter data
177
+ #
178
+ @filter = [
179
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
180
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
181
+ (options[:only_crawlers] ? "bot == 1" : nil),
182
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
183
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
184
+ "true"
185
+ ].compact.join " and "
186
+
187
+ # in alternative to sum(size)
188
+ human_readable_size = <<-EOS
189
+ CASE
190
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
191
+ WHEN sum(size) >= 1024 AND sum(size) < (1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / 1024),2) || ' KB'
192
+ WHEN sum(size) >= (1024 * 1024) AND sum(size) < (1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024)),2) || ' MB'
193
+ WHEN sum(size) >= (1024 * 1024 * 1024) AND sum(size) < (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024)),2) || ' GB'
194
+ WHEN sum(size) >= (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024 * 1024)),2) || ' TB'
195
+ END AS size
196
+ EOS
197
+
198
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
199
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
200
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{@filter}"
201
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
202
+
203
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by date(datetime)"
204
+
205
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by strftime('%H', datetime)"
206
+
207
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
208
+
209
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
210
+
211
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
212
+
213
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
214
+
215
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
216
+ "extension != '#{x}'"
217
+ }.join " and "
218
+
219
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
220
+
221
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
222
+
223
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
224
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
225
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
226
+
227
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
228
+ [x[0], x[1].map { |y| y[1] }].flatten
229
+ }
230
+
231
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by browser order by count(browser) desc"
232
+
233
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by platform order by count(platform) desc"
234
+
235
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
236
+
237
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
238
+
239
+
240
+ end
241
+
242
+
243
+ end
244
+
245
+
@@ -0,0 +1,84 @@
1
+ module ApacheLogReport
2
+ module DataCruncher
3
+
4
+ #
5
+ # take a sqlite3 database and analyze data
6
+ #
7
+
8
+ def self.crunch db, options = {}
9
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
10
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
11
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
12
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
13
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
14
+
15
+ #
16
+ # generate the where clause corresponding to the command line options to filter data
17
+ #
18
+ filter = [
19
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
20
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
21
+ (options[:only_crawlers] ? "bot == 1" : nil),
22
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
23
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
24
+ "true"
25
+ ].compact.join " and "
26
+
27
+ mega = 1024 * 1024
28
+ giga = mega * 1024
29
+ tera = giga * 1024
30
+
31
+ # in alternative to sum(size)
32
+ human_readable_size = <<-EOS
33
+ CASE
34
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
35
+ WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
36
+ WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
37
+ WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
38
+ WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
39
+ END AS size
40
+ EOS
41
+
42
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
43
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
44
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
45
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
46
+
47
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
48
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
49
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
50
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
51
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
52
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
53
+
54
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
55
+ "extension != '#{x}'"
56
+ }.join " and "
57
+
58
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
59
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
60
+
61
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
62
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
63
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
64
+
65
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
66
+ [x[0], x[1].map { |y| y[1] }].flatten
67
+ }
68
+
69
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
70
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
71
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
72
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
73
+
74
+ data = {}
75
+ self.instance_variables.each do |variable|
76
+ var_as_symbol = variable.to_s[1..-1].to_sym
77
+ data[var_as_symbol] = eval(variable.to_s)
78
+ end
79
+ data
80
+ end
81
+ end
82
+
83
+ end
84
+
@@ -0,0 +1,50 @@
1
+ require 'terminal-table'
2
+ require 'erb'
3
+ require 'ostruct'
4
+ require 'byebug'
5
+
6
+ module ApacheLogReport
7
+ module Emitter
8
+
9
+ #
10
+ # Emit Data
11
+ #
12
+ def self.emit data = {}, options = {}
13
+ @prefix = options[:prefix]
14
+ @suffix = options[:suffix]
15
+ @export = options[:code_export]
16
+ @mode = options[:output_format] || "org"
17
+
18
+ # for the ERB binding
19
+ @data = data
20
+ @options = options
21
+
22
+ # determine the main template to read
23
+ @template = File.join(File.dirname(__FILE__), "templates", "template.#{@mode}.erb")
24
+ erb_template = File.read @template
25
+
26
+ output = ERB.new(erb_template).result(binding)
27
+ puts output
28
+ end
29
+
30
+ private
31
+
32
+ def self.output_table name, headings, rows
33
+ name = "#+NAME: #{name}"
34
+ table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
35
+
36
+ #(2..headings.size).each do |i|
37
+ # table.align_column(i, :right)
38
+ #end
39
+
40
+ name + "\n" + table.to_s
41
+ end
42
+
43
+ def self.render(template, vars)
44
+ @template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
45
+ erb_template = File.read @template
46
+ ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,87 @@
1
+ require 'apache_log/parser'
2
+ require 'sqlite3'
3
+ require 'browser'
4
+
5
+ module ApacheLogReport
6
+ module LogParser
7
+ #
8
+ # parse an Apache log file and return a SQLite3 DB
9
+ #
10
+
11
+ def self.parse filename, options = {}
12
+ content = filename ? File.readlines(filename) : ARGF.readlines
13
+
14
+ db = SQLite3::Database.new ":memory:"
15
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ datetime TEXT,
18
+ ip TEXT,
19
+ user TEXT,
20
+ unique_visitor TEXT,
21
+ method TEXT,
22
+ path TEXT,
23
+ extension TEXT,
24
+ status TEXT,
25
+ size INTEGER,
26
+ referer TEXT,
27
+ user_agent TEXT,
28
+ bot INTEGER,
29
+ browser TEXT,
30
+ browser_version TEXT,
31
+ platform TEXT,
32
+ platform_version TEXT)"
33
+
34
+ ins = db.prepare('insert into LogLine (
35
+ datetime,
36
+ ip,
37
+ user,
38
+ unique_visitor,
39
+ method,
40
+ path,
41
+ extension,
42
+ status,
43
+ size,
44
+ referer,
45
+ user_agent,
46
+ bot,
47
+ browser,
48
+ browser_version,
49
+ platform,
50
+ platform_version)
51
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
52
+
53
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
54
+
55
+ content.each do |line|
56
+ begin
57
+ hash = parser.parse line
58
+
59
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
60
+ ins.execute(
61
+ hash[:datetime].iso8601,
62
+ hash[:remote_host],
63
+ hash[:user],
64
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
65
+ hash[:request][:method],
66
+ hash[:request][:path],
67
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
68
+ hash[:status],
69
+ hash[:size].to_i,
70
+ hash[:referer],
71
+ hash[:user_agent],
72
+ ua.bot? ? 1 : 0,
73
+ (ua.name || ""),
74
+ (ua.version || ""),
75
+ (ua.platform.name || ""),
76
+ (ua.platform.version || "")
77
+ )
78
+ rescue
79
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
80
+ end
81
+ end
82
+
83
+ db
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,86 @@
1
+ require 'optparse'
2
+ require 'optparse/date'
3
+ require 'apache_log_report/version'
4
+
5
+ module ApacheLogReport
6
+ module OptionsParser
7
+ #
8
+ # parse command line options
9
+ #
10
+ def self.parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in Org Mode code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-fFORMAT", "--format=FORMAT", String, "Output format: html, org. Defaults to org mode") do |n|
54
+ args[:output_format] = n
55
+ end
56
+
57
+ opts.on("-v", "--version", "Prints version information") do
58
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
59
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
60
+ puts "Distributed under the terms of the MIT license"
61
+ puts ""
62
+ puts "Written by Adolfo Villafiorita"
63
+ exit
64
+ end
65
+
66
+ opts.on("-h", "--help", "Prints this help") do
67
+ puts opts
68
+ puts "This is version #{ApacheLogReport::VERSION}"
69
+ exit
70
+ end
71
+ end
72
+
73
+ opt_parser.parse!(options)
74
+
75
+ args[:limit] ||= limit
76
+ args[:ignore_crawlers] ||= false
77
+ args[:no_selfpoll] ||= false
78
+ args[:only_crawlers] ||= false
79
+ args[:prefix] ||= ""
80
+ args[:suffix] ||= ""
81
+ args[:code_export] ||= "both"
82
+
83
+ return args
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,25 @@
1
+ <%
2
+ def slugify string
3
+ string.downcase.gsub(/ +/, '-')
4
+ end
5
+ %>
6
+
7
+ <table id="<%= slugify(title || "") %>" class="<%= slugify(title || "") %>">
8
+ <thead>
9
+ <tr>
10
+ <% header.each do |heading| %>
11
+ <th class="<%= slugify(heading) %>"><%= heading %></th>
12
+ <% end %>
13
+ </tr>
14
+ </thead>
15
+ <tbody>
16
+ <% rows.each do |row| %>
17
+ <tr>
18
+ <% row.each_with_index do |cell, i| %>
19
+ <td class="<%= slugify (header[i] || "") %>"><%= cell %></td>
20
+ <% end %>
21
+ </tr>
22
+ <% end %>
23
+ </tbody>
24
+ </table>
25
+