apache_log_report 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be24b7fd62ff47195fe5e19cd2717252a37a687a8d92745add21acc952fc50a3
4
- data.tar.gz: 3f5daff6a7447b6acca3a7ef456b148c3de49ff2dee87b75d419763dc2bbe2c4
3
+ metadata.gz: f35ffbe99784dc77cbd61bb7b949237b9b380448b4829e442f0ad17513adc7b7
4
+ data.tar.gz: 1d5030f64425293895b4b1fc84199178971bd34bde0d9b8450e868ec1d427cc3
5
5
  SHA512:
6
- metadata.gz: d5271fd26059e942a730c649cd6ea00c35b459c3f5b130bd2d0ae707717b3554c90b7adff8364f384e56b5e8782ca147b1fd40f957290aed4a7bc443eb38387c
7
- data.tar.gz: 475dfe3e49e76434c620bb75558bc2b5e40c6441fe455fca58d9b70b366a549f76a9d1bbbf9aae4bf5c9e0daa013770cefa9047fa11b8206f7eff4425be54b50
6
+ metadata.gz: 3eed1f80dec99429d5a3f25a99fd6e7ceca28547df852d97422a7cdb11b5d045592eb1c7e15443aca6c38744a7b62f59422049783f86d9b379039374c1b4ef09
7
+ data.tar.gz: 7de8e31cc0024d34191b6b70f8716bc4d31e037764e0f776d75a806490fe1f8cb170c8c989c570fb84f0329c0f12298d5a3bc39b32caa3cdcd826775c3ceaaae
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- apache_log_report (0.9.7)
4
+ apache_log_report (1.1.0)
5
5
  apache_log-parser
6
6
  browser
7
7
  sqlite3
@@ -11,12 +11,12 @@ GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
13
  apache_log-parser (3.1.2)
14
- browser (5.0.0)
14
+ browser (5.3.1)
15
15
  rake (12.3.3)
16
16
  sqlite3 (1.4.2)
17
- terminal-table (1.8.0)
18
- unicode-display_width (~> 1.1, >= 1.1.1)
19
- unicode-display_width (1.7.0)
17
+ terminal-table (3.0.2)
18
+ unicode-display_width (>= 1.1.1, < 3)
19
+ unicode-display_width (2.1.0)
20
20
 
21
21
  PLATFORMS
22
22
  ruby
@@ -26,4 +26,4 @@ DEPENDENCIES
26
26
  rake (~> 12.0)
27
27
 
28
28
  BUNDLED WITH
29
- 2.1.4
29
+ 2.2.29
@@ -2,12 +2,13 @@
2
2
 
3
3
  require 'apache_log_report.rb'
4
4
 
5
+
5
6
  #
6
7
  # Parse Command Line Arguments
7
8
  #
8
9
 
9
10
  @command = ARGV.join(" ")
10
- @options = ApacheLogReport.options_parse ARGV
11
+ @options = ApacheLogReport::OptionsParser.parse ARGV
11
12
  @log_file = ARGV[0]
12
13
 
13
14
  if @log_file and not File.exist? @log_file
@@ -20,12 +21,23 @@ end
20
21
  #
21
22
 
22
23
  @started_at = Time.now
23
- @db = ApacheLogReport.parse @log_file
24
- ApacheLogReport.analyze_data @db, @options
24
+
25
+ @db = ApacheLogReport::LogParser.parse @log_file
26
+ @data = ApacheLogReport::DataCruncher.crunch @db, @options
27
+
25
28
  @ended_at = Time.now
26
29
  @duration = @ended_at - @started_at
27
30
 
31
+ @data = @data.merge({
32
+ command: @command,
33
+ log_file: @log_file,
34
+ started_at: @started_at,
35
+ ended_at: @ended_at,
36
+ duration: @duration
37
+ })
38
+
28
39
  #
29
40
  # Emit Output
30
41
  #
31
- puts ApacheLogReport.emit @options, @command, @log_file, @started_at, @ended_at, @duration
42
+
43
+ puts ApacheLogReport::Emitter.emit @data, @options
@@ -0,0 +1,245 @@
1
+ module ApacheLogReport
2
+
3
+ #
4
+ # parse command line options
5
+ #
6
+ require 'optparse'
7
+ require 'optparse/date'
8
+ require 'apache_log_report/version'
9
+
10
+ def self.options_parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-v", "--version", "Prints version information") do
54
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
55
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
56
+ puts "Distributed under the terms of the MIT license"
57
+ puts ""
58
+ puts "Written by Adolfo Villafiorita"
59
+ exit
60
+ end
61
+
62
+ opts.on("-h", "--help", "Prints this help") do
63
+ puts opts
64
+ puts "This is version #{ApacheLogReport::VERSION}"
65
+ exit
66
+ end
67
+ end
68
+
69
+ opt_parser.parse!(options)
70
+
71
+ args[:limit] ||= limit
72
+ args[:ignore_crawlers] ||= false
73
+ args[:no_selfpoll] ||= false
74
+ args[:only_crawlers] ||= false
75
+ args[:prefix] ||= ""
76
+ args[:suffix] ||= ""
77
+ args[:code_export] ||= "both"
78
+
79
+ return args
80
+ end
81
+
82
+ #
83
+ # parse an Apache log file and return a SQLite3 DB
84
+ #
85
+ require 'apache_log/parser'
86
+ require 'sqlite3'
87
+ require 'browser'
88
+
89
+ def self.parse filename, options = {}
90
+ content = filename ? File.readlines(filename) : ARGF.readlines
91
+
92
+ db = SQLite3::Database.new ":memory:"
93
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
94
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
95
+ datetime TEXT,
96
+ ip TEXT,
97
+ user TEXT,
98
+ unique_visitor TEXT,
99
+ method TEXT,
100
+ path TEXT,
101
+ extension TEXT,
102
+ status TEXT,
103
+ size INTEGER,
104
+ referer TEXT,
105
+ user_agent TEXT,
106
+ bot INTEGER,
107
+ browser TEXT,
108
+ browser_version TEXT,
109
+ platform TEXT,
110
+ platform_version TEXT)"
111
+
112
+ ins = db.prepare('insert into LogLine (
113
+ datetime,
114
+ ip,
115
+ user,
116
+ unique_visitor,
117
+ method,
118
+ path,
119
+ extension,
120
+ status,
121
+ size,
122
+ referer,
123
+ user_agent,
124
+ bot,
125
+ browser,
126
+ browser_version,
127
+ platform,
128
+ platform_version)
129
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
130
+
131
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
132
+
133
+ content.each do |line|
134
+ begin
135
+ hash = parser.parse line
136
+
137
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
138
+ ins.execute(
139
+ hash[:datetime].iso8601,
140
+ hash[:remote_host],
141
+ hash[:user],
142
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
143
+ hash[:request][:method],
144
+ hash[:request][:path],
145
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
146
+ hash[:status],
147
+ hash[:size].to_i,
148
+ hash[:referer],
149
+ hash[:user_agent],
150
+ ua.bot? ? 1 : 0,
151
+ (ua.name || ""),
152
+ (ua.version || ""),
153
+ (ua.platform.name || ""),
154
+ (ua.platform.version || "")
155
+ )
156
+ rescue
157
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
158
+ end
159
+ end
160
+
161
+ db
162
+ end
163
+
164
+ #
165
+ # take a sqlite3 database and analyze data
166
+ #
167
+ def self.analyze_data db, options = {}
168
+
169
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
170
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
171
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
172
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
173
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
174
+
175
+ #
176
+ # generate the where clause corresponding to the command line options to filter data
177
+ #
178
+ @filter = [
179
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
180
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
181
+ (options[:only_crawlers] ? "bot == 1" : nil),
182
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
183
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
184
+ "true"
185
+ ].compact.join " and "
186
+
187
+ # in alternative to sum(size)
188
+ human_readable_size = <<-EOS
189
+ CASE
190
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
191
+ WHEN sum(size) >= 1024 AND sum(size) < (1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / 1024),2) || ' KB'
192
+ WHEN sum(size) >= (1024 * 1024) AND sum(size) < (1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024)),2) || ' MB'
193
+ WHEN sum(size) >= (1024 * 1024 * 1024) AND sum(size) < (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024)),2) || ' GB'
194
+ WHEN sum(size) >= (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024 * 1024)),2) || ' TB'
195
+ END AS size
196
+ EOS
197
+
198
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
199
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
200
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{@filter}"
201
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
202
+
203
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by date(datetime)"
204
+
205
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by strftime('%H', datetime)"
206
+
207
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
208
+
209
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
210
+
211
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
212
+
213
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
214
+
215
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
216
+ "extension != '#{x}'"
217
+ }.join " and "
218
+
219
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
220
+
221
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
222
+
223
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
224
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
225
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
226
+
227
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
228
+ [x[0], x[1].map { |y| y[1] }].flatten
229
+ }
230
+
231
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by browser order by count(browser) desc"
232
+
233
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by platform order by count(platform) desc"
234
+
235
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
236
+
237
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
238
+
239
+
240
+ end
241
+
242
+
243
+ end
244
+
245
+
@@ -0,0 +1,84 @@
1
+ module ApacheLogReport
2
+ module DataCruncher
3
+
4
+ #
5
+ # take a sqlite3 database and analyze data
6
+ #
7
+
8
+ def self.crunch db, options = {}
9
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
10
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
11
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
12
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
13
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
14
+
15
+ #
16
+ # generate the where clause corresponding to the command line options to filter data
17
+ #
18
+ filter = [
19
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
20
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
21
+ (options[:only_crawlers] ? "bot == 1" : nil),
22
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
23
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
24
+ "true"
25
+ ].compact.join " and "
26
+
27
+ mega = 1024 * 1024
28
+ giga = mega * 1024
29
+ tera = giga * 1024
30
+
31
+ # in alternative to sum(size)
32
+ human_readable_size = <<-EOS
33
+ CASE
34
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
35
+ WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
36
+ WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
37
+ WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
38
+ WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
39
+ END AS size
40
+ EOS
41
+
42
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
43
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
44
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
45
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
46
+
47
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
48
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
49
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
50
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
51
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
52
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
53
+
54
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
55
+ "extension != '#{x}'"
56
+ }.join " and "
57
+
58
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
59
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
60
+
61
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
62
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
63
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
64
+
65
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
66
+ [x[0], x[1].map { |y| y[1] }].flatten
67
+ }
68
+
69
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
70
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
71
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
72
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
73
+
74
+ data = {}
75
+ self.instance_variables.each do |variable|
76
+ var_as_symbol = variable.to_s[1..-1].to_sym
77
+ data[var_as_symbol] = eval(variable.to_s)
78
+ end
79
+ data
80
+ end
81
+ end
82
+
83
+ end
84
+
@@ -0,0 +1,50 @@
1
+ require 'terminal-table'
2
+ require 'erb'
3
+ require 'ostruct'
4
+ require 'byebug'
5
+
6
+ module ApacheLogReport
7
+ module Emitter
8
+
9
+ #
10
+ # Emit Data
11
+ #
12
+ def self.emit data = {}, options = {}
13
+ @prefix = options[:prefix]
14
+ @suffix = options[:suffix]
15
+ @export = options[:code_export]
16
+ @mode = options[:output_format] || "org"
17
+
18
+ # for the ERB binding
19
+ @data = data
20
+ @options = options
21
+
22
+ # determine the main template to read
23
+ @template = File.join(File.dirname(__FILE__), "templates", "template.#{@mode}.erb")
24
+ erb_template = File.read @template
25
+
26
+ output = ERB.new(erb_template).result(binding)
27
+ puts output
28
+ end
29
+
30
+ private
31
+
32
+ def self.output_table name, headings, rows
33
+ name = "#+NAME: #{name}"
34
+ table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
35
+
36
+ #(2..headings.size).each do |i|
37
+ # table.align_column(i, :right)
38
+ #end
39
+
40
+ name + "\n" + table.to_s
41
+ end
42
+
43
+ def self.render(template, vars)
44
+ @template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
45
+ erb_template = File.read @template
46
+ ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,87 @@
1
+ require 'apache_log/parser'
2
+ require 'sqlite3'
3
+ require 'browser'
4
+
5
+ module ApacheLogReport
6
+ module LogParser
7
+ #
8
+ # parse an Apache log file and return a SQLite3 DB
9
+ #
10
+
11
+ def self.parse filename, options = {}
12
+ content = filename ? File.readlines(filename) : ARGF.readlines
13
+
14
+ db = SQLite3::Database.new ":memory:"
15
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ datetime TEXT,
18
+ ip TEXT,
19
+ user TEXT,
20
+ unique_visitor TEXT,
21
+ method TEXT,
22
+ path TEXT,
23
+ extension TEXT,
24
+ status TEXT,
25
+ size INTEGER,
26
+ referer TEXT,
27
+ user_agent TEXT,
28
+ bot INTEGER,
29
+ browser TEXT,
30
+ browser_version TEXT,
31
+ platform TEXT,
32
+ platform_version TEXT)"
33
+
34
+ ins = db.prepare('insert into LogLine (
35
+ datetime,
36
+ ip,
37
+ user,
38
+ unique_visitor,
39
+ method,
40
+ path,
41
+ extension,
42
+ status,
43
+ size,
44
+ referer,
45
+ user_agent,
46
+ bot,
47
+ browser,
48
+ browser_version,
49
+ platform,
50
+ platform_version)
51
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
52
+
53
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
54
+
55
+ content.each do |line|
56
+ begin
57
+ hash = parser.parse line
58
+
59
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
60
+ ins.execute(
61
+ hash[:datetime].iso8601,
62
+ hash[:remote_host],
63
+ hash[:user],
64
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
65
+ hash[:request][:method],
66
+ hash[:request][:path],
67
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
68
+ hash[:status],
69
+ hash[:size].to_i,
70
+ hash[:referer],
71
+ hash[:user_agent],
72
+ ua.bot? ? 1 : 0,
73
+ (ua.name || ""),
74
+ (ua.version || ""),
75
+ (ua.platform.name || ""),
76
+ (ua.platform.version || "")
77
+ )
78
+ rescue
79
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
80
+ end
81
+ end
82
+
83
+ db
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,86 @@
1
+ require 'optparse'
2
+ require 'optparse/date'
3
+ require 'apache_log_report/version'
4
+
5
+ module ApacheLogReport
6
+ module OptionsParser
7
+ #
8
+ # parse command line options
9
+ #
10
+ def self.parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in Org Mode code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-fFORMAT", "--format=FORMAT", String, "Output format: html, org. Defaults to org mode") do |n|
54
+ args[:output_format] = n
55
+ end
56
+
57
+ opts.on("-v", "--version", "Prints version information") do
58
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
59
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
60
+ puts "Distributed under the terms of the MIT license"
61
+ puts ""
62
+ puts "Written by Adolfo Villafiorita"
63
+ exit
64
+ end
65
+
66
+ opts.on("-h", "--help", "Prints this help") do
67
+ puts opts
68
+ puts "This is version #{ApacheLogReport::VERSION}"
69
+ exit
70
+ end
71
+ end
72
+
73
+ opt_parser.parse!(options)
74
+
75
+ args[:limit] ||= limit
76
+ args[:ignore_crawlers] ||= false
77
+ args[:no_selfpoll] ||= false
78
+ args[:only_crawlers] ||= false
79
+ args[:prefix] ||= ""
80
+ args[:suffix] ||= ""
81
+ args[:code_export] ||= "both"
82
+
83
+ return args
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,25 @@
1
+ <%
2
+ def slugify string
3
+ string.downcase.gsub(/ +/, '-')
4
+ end
5
+ %>
6
+
7
+ <table id="<%= slugify(title || "") %>" class="<%= slugify(title || "") %>">
8
+ <thead>
9
+ <tr>
10
+ <% header.each do |heading| %>
11
+ <th class="<%= slugify(heading) %>"><%= heading %></th>
12
+ <% end %>
13
+ </tr>
14
+ </thead>
15
+ <tbody>
16
+ <% rows.each do |row| %>
17
+ <tr>
18
+ <% row.each_with_index do |cell, i| %>
19
+ <td class="<%= slugify (header[i] || "") %>"><%= cell %></td>
20
+ <% end %>
21
+ </tr>
22
+ <% end %>
23
+ </tbody>
24
+ </table>
25
+