apache_log_report 1.0.0 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be24b7fd62ff47195fe5e19cd2717252a37a687a8d92745add21acc952fc50a3
4
- data.tar.gz: 3f5daff6a7447b6acca3a7ef456b148c3de49ff2dee87b75d419763dc2bbe2c4
3
+ metadata.gz: 45396921f6e594c2b2dcce25320c292c08f4bb35ab7c19dc72fb99cec8520dd4
4
+ data.tar.gz: 990dacd5e607a0f62a55f7248396ac095f2867f905807f91c3df1f6c06b5def0
5
5
  SHA512:
6
- metadata.gz: d5271fd26059e942a730c649cd6ea00c35b459c3f5b130bd2d0ae707717b3554c90b7adff8364f384e56b5e8782ca147b1fd40f957290aed4a7bc443eb38387c
7
- data.tar.gz: 475dfe3e49e76434c620bb75558bc2b5e40c6441fe455fca58d9b70b366a549f76a9d1bbbf9aae4bf5c9e0daa013770cefa9047fa11b8206f7eff4425be54b50
6
+ metadata.gz: a1c69d138504939d0c1917c7d9f278fff974fce0c27f82c64fd45b80641070924ea5f63a17b6efb4c64c63ea54f458a8b7d3d3f3e100664c70cdc3488be78950
7
+ data.tar.gz: a675921834c7551b2b95bf983c08a71f40f94f1f16e2b24d6b4fb10c4be61f9ffd8abb9947f985f0419d1678045758934e448d7e77f4904d806f3c966b8af358
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- apache_log_report (0.9.7)
4
+ apache_log_report (1.1.2)
5
5
  apache_log-parser
6
6
  browser
7
7
  sqlite3
@@ -11,12 +11,12 @@ GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
13
  apache_log-parser (3.1.2)
14
- browser (5.0.0)
14
+ browser (5.3.1)
15
15
  rake (12.3.3)
16
16
  sqlite3 (1.4.2)
17
- terminal-table (1.8.0)
18
- unicode-display_width (~> 1.1, >= 1.1.1)
19
- unicode-display_width (1.7.0)
17
+ terminal-table (3.0.2)
18
+ unicode-display_width (>= 1.1.1, < 3)
19
+ unicode-display_width (2.1.0)
20
20
 
21
21
  PLATFORMS
22
22
  ruby
@@ -26,4 +26,4 @@ DEPENDENCIES
26
26
  rake (~> 12.0)
27
27
 
28
28
  BUNDLED WITH
29
- 2.1.4
29
+ 2.2.29
data/README.org CHANGED
@@ -14,10 +14,8 @@ See the [[file:CHANGELOG.org][CHANGELOG]] file.
14
14
 
15
15
  * Todo
16
16
 
17
- ** TODO Version information from command line and in reports
18
- ** TODO Refactor code from one giant class to more manageable chunkes
19
- ** TODO Move performance stats var to class (to isolate vars)
20
- ** TODO Check total number of days (which is not working, now)
17
+ ** TODO Graphs in HTML output
18
+ ** TODO Countries
21
19
 
22
20
  * Compatibility
23
21
 
@@ -6,8 +6,8 @@ Gem::Specification.new do |spec|
6
6
  spec.authors = ["Adolfo Villafiorita"]
7
7
  spec.email = ["adolfo.villafiorita@ict4g.net"]
8
8
 
9
- spec.summary = %q{Generate a request report in OrgMode format from an Apache log file.}
10
- spec.description = %q{Generate a request report in OrgMode format from an Apache log file.}
9
+ spec.summary = %q{Generate analytics from an Apache log file.}
10
+ spec.description = %q{Generate requests reports in HTML, OrgMode, and SQLite format from an Apache log file.}
11
11
  spec.homepage = "https://www.ict4g.net/gitea/adolfo/apache_log_report"
12
12
  spec.license = "MIT"
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
@@ -2,12 +2,13 @@
2
2
 
3
3
  require 'apache_log_report.rb'
4
4
 
5
+
5
6
  #
6
7
  # Parse Command Line Arguments
7
8
  #
8
9
 
9
10
  @command = ARGV.join(" ")
10
- @options = ApacheLogReport.options_parse ARGV
11
+ @options = ApacheLogReport::OptionsParser.parse ARGV
11
12
  @log_file = ARGV[0]
12
13
 
13
14
  if @log_file and not File.exist? @log_file
@@ -20,12 +21,30 @@ end
20
21
  #
21
22
 
22
23
  @started_at = Time.now
23
- @db = ApacheLogReport.parse @log_file
24
- ApacheLogReport.analyze_data @db, @options
25
- @ended_at = Time.now
26
- @duration = @ended_at - @started_at
27
24
 
28
- #
29
- # Emit Output
30
- #
31
- puts ApacheLogReport.emit @options, @command, @log_file, @started_at, @ended_at, @duration
25
+ @db = ApacheLogReport::LogParser.parse @log_file
26
+
27
+ if @options[:output_format] == "sqlite"
28
+ ddb = SQLite3::Database.new('db.sqlite3')
29
+ b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
30
+ b.step(-1) #=> DONE
31
+ b.finish
32
+ else
33
+ @data = ApacheLogReport::DataCruncher.crunch @db, @options
34
+
35
+ @ended_at = Time.now
36
+ @duration = @ended_at - @started_at
37
+
38
+ @data = @data.merge({
39
+ command: @command,
40
+ log_file: @log_file,
41
+ started_at: @started_at,
42
+ ended_at: @ended_at,
43
+ duration: @duration
44
+ })
45
+
46
+ #
47
+ # Emit Output
48
+ #
49
+ puts ApacheLogReport::Emitter.emit @data, @options
50
+ end
@@ -0,0 +1,245 @@
1
+ module ApacheLogReport
2
+
3
+ #
4
+ # parse command line options
5
+ #
6
+ require 'optparse'
7
+ require 'optparse/date'
8
+ require 'apache_log_report/version'
9
+
10
+ def self.options_parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-v", "--version", "Prints version information") do
54
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
55
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
56
+ puts "Distributed under the terms of the MIT license"
57
+ puts ""
58
+ puts "Written by Adolfo Villafiorita"
59
+ exit
60
+ end
61
+
62
+ opts.on("-h", "--help", "Prints this help") do
63
+ puts opts
64
+ puts "This is version #{ApacheLogReport::VERSION}"
65
+ exit
66
+ end
67
+ end
68
+
69
+ opt_parser.parse!(options)
70
+
71
+ args[:limit] ||= limit
72
+ args[:ignore_crawlers] ||= false
73
+ args[:no_selfpoll] ||= false
74
+ args[:only_crawlers] ||= false
75
+ args[:prefix] ||= ""
76
+ args[:suffix] ||= ""
77
+ args[:code_export] ||= "both"
78
+
79
+ return args
80
+ end
81
+
82
+ #
83
+ # parse an Apache log file and return a SQLite3 DB
84
+ #
85
+ require 'apache_log/parser'
86
+ require 'sqlite3'
87
+ require 'browser'
88
+
89
+ def self.parse filename, options = {}
90
+ content = filename ? File.readlines(filename) : ARGF.readlines
91
+
92
+ db = SQLite3::Database.new ":memory:"
93
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
94
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
95
+ datetime TEXT,
96
+ ip TEXT,
97
+ user TEXT,
98
+ unique_visitor TEXT,
99
+ method TEXT,
100
+ path TEXT,
101
+ extension TEXT,
102
+ status TEXT,
103
+ size INTEGER,
104
+ referer TEXT,
105
+ user_agent TEXT,
106
+ bot INTEGER,
107
+ browser TEXT,
108
+ browser_version TEXT,
109
+ platform TEXT,
110
+ platform_version TEXT)"
111
+
112
+ ins = db.prepare('insert into LogLine (
113
+ datetime,
114
+ ip,
115
+ user,
116
+ unique_visitor,
117
+ method,
118
+ path,
119
+ extension,
120
+ status,
121
+ size,
122
+ referer,
123
+ user_agent,
124
+ bot,
125
+ browser,
126
+ browser_version,
127
+ platform,
128
+ platform_version)
129
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
130
+
131
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
132
+
133
+ content.each do |line|
134
+ begin
135
+ hash = parser.parse line
136
+
137
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
138
+ ins.execute(
139
+ hash[:datetime].iso8601,
140
+ hash[:remote_host],
141
+ hash[:user],
142
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
143
+ hash[:request][:method],
144
+ hash[:request][:path],
145
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
146
+ hash[:status],
147
+ hash[:size].to_i,
148
+ hash[:referer],
149
+ hash[:user_agent],
150
+ ua.bot? ? 1 : 0,
151
+ (ua.name || ""),
152
+ (ua.version || ""),
153
+ (ua.platform.name || ""),
154
+ (ua.platform.version || "")
155
+ )
156
+ rescue
157
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
158
+ end
159
+ end
160
+
161
+ db
162
+ end
163
+
164
+ #
165
+ # take a sqlite3 database and analyze data
166
+ #
167
+ def self.analyze_data db, options = {}
168
+
169
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
170
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
171
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
172
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
173
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
174
+
175
+ #
176
+ # generate the where clause corresponding to the command line options to filter data
177
+ #
178
+ @filter = [
179
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
180
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
181
+ (options[:only_crawlers] ? "bot == 1" : nil),
182
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
183
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
184
+ "true"
185
+ ].compact.join " and "
186
+
187
+ # in alternative to sum(size)
188
+ human_readable_size = <<-EOS
189
+ CASE
190
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
191
+ WHEN sum(size) >= 1024 AND sum(size) < (1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / 1024),2) || ' KB'
192
+ WHEN sum(size) >= (1024 * 1024) AND sum(size) < (1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024)),2) || ' MB'
193
+ WHEN sum(size) >= (1024 * 1024 * 1024) AND sum(size) < (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024)),2) || ' GB'
194
+ WHEN sum(size) >= (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024 * 1024)),2) || ' TB'
195
+ END AS size
196
+ EOS
197
+
198
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
199
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
200
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{@filter}"
201
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
202
+
203
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by date(datetime)"
204
+
205
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by strftime('%H', datetime)"
206
+
207
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
208
+
209
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
210
+
211
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
212
+
213
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
214
+
215
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
216
+ "extension != '#{x}'"
217
+ }.join " and "
218
+
219
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
220
+
221
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
222
+
223
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
224
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
225
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
226
+
227
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
228
+ [x[0], x[1].map { |y| y[1] }].flatten
229
+ }
230
+
231
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by browser order by count(browser) desc"
232
+
233
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by platform order by count(platform) desc"
234
+
235
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
236
+
237
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
238
+
239
+
240
+ end
241
+
242
+
243
+ end
244
+
245
+
@@ -0,0 +1,86 @@
1
+ module ApacheLogReport
2
+ module DataCruncher
3
+
4
+ #
5
+ # take a sqlite3 database and analyze data
6
+ #
7
+
8
+ def self.crunch db, options = {}
9
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
10
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
11
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
12
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
13
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
14
+
15
+ #
16
+ # generate the where clause corresponding to the command line options to filter data
17
+ #
18
+ filter = [
19
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
20
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
21
+ (options[:only_crawlers] ? "bot == 1" : nil),
22
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
23
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
24
+ "true"
25
+ ].compact.join " and "
26
+
27
+ mega = 1024 * 1024
28
+ giga = mega * 1024
29
+ tera = giga * 1024
30
+
31
+ # in alternative to sum(size)
32
+ human_readable_size = <<-EOS
33
+ CASE
34
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
35
+ WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
36
+ WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
37
+ WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
38
+ WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
39
+ END AS size
40
+ EOS
41
+
42
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
43
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
44
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
45
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
46
+
47
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
48
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
49
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
50
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
51
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
52
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
53
+
54
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
55
+ "extension != '#{x}'"
56
+ }.join " and "
57
+
58
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
59
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
60
+
61
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
62
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
63
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
64
+
65
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
66
+ [x[0], x[1].map { |y| y[1] }].flatten
67
+ }
68
+
69
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
70
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
71
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
72
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
73
+
74
+ @streaks = db.execute "SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime"
75
+
76
+ data = {}
77
+ self.instance_variables.each do |variable|
78
+ var_as_symbol = variable.to_s[1..-1].to_sym
79
+ data[var_as_symbol] = eval(variable.to_s)
80
+ end
81
+ data
82
+ end
83
+ end
84
+
85
+ end
86
+
@@ -0,0 +1,49 @@
1
+ require 'terminal-table'
2
+ require 'erb'
3
+ require 'ostruct'
4
+
5
+ module ApacheLogReport
6
+ module Emitter
7
+
8
+ #
9
+ # Emit Data
10
+ #
11
+ def self.emit data = {}, options = {}
12
+ @prefix = options[:prefix]
13
+ @suffix = options[:suffix]
14
+ @export = options[:code_export]
15
+ @mode = options[:output_format] || "org"
16
+
17
+ # for the ERB binding
18
+ @data = data
19
+ @options = options
20
+
21
+ # determine the main template to read
22
+ @template = File.join(File.dirname(__FILE__), "templates", "template.#{@mode}.erb")
23
+ erb_template = File.read @template
24
+
25
+ output = ERB.new(erb_template).result(binding)
26
+ puts output
27
+ end
28
+
29
+ private
30
+
31
+ def self.output_table name, headings, rows
32
+ name = "#+NAME: #{name}"
33
+ table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
34
+
35
+ #(2..headings.size).each do |i|
36
+ # table.align_column(i, :right)
37
+ #end
38
+
39
+ name + "\n" + table.to_s
40
+ end
41
+
42
+ def self.render(template, vars)
43
+ @template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
44
+ erb_template = File.read @template
45
+ ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,87 @@
1
+ require 'apache_log/parser'
2
+ require 'sqlite3'
3
+ require 'browser'
4
+
5
+ module ApacheLogReport
6
+ module LogParser
7
+ #
8
+ # parse an Apache log file and return a SQLite3 DB
9
+ #
10
+
11
+ def self.parse filename, options = {}
12
+ content = filename ? File.readlines(filename) : ARGF.readlines
13
+
14
+ db = SQLite3::Database.new ":memory:"
15
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ datetime TEXT,
18
+ ip TEXT,
19
+ user TEXT,
20
+ unique_visitor TEXT,
21
+ method TEXT,
22
+ path TEXT,
23
+ extension TEXT,
24
+ status TEXT,
25
+ size INTEGER,
26
+ referer TEXT,
27
+ user_agent TEXT,
28
+ bot INTEGER,
29
+ browser TEXT,
30
+ browser_version TEXT,
31
+ platform TEXT,
32
+ platform_version TEXT)"
33
+
34
+ ins = db.prepare('insert into LogLine (
35
+ datetime,
36
+ ip,
37
+ user,
38
+ unique_visitor,
39
+ method,
40
+ path,
41
+ extension,
42
+ status,
43
+ size,
44
+ referer,
45
+ user_agent,
46
+ bot,
47
+ browser,
48
+ browser_version,
49
+ platform,
50
+ platform_version)
51
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
52
+
53
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
54
+
55
+ content.each do |line|
56
+ begin
57
+ hash = parser.parse line
58
+
59
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
60
+ ins.execute(
61
+ hash[:datetime].iso8601,
62
+ hash[:remote_host],
63
+ hash[:user],
64
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
65
+ hash[:request][:method],
66
+ hash[:request][:path],
67
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
68
+ hash[:status],
69
+ hash[:size].to_i,
70
+ hash[:referer],
71
+ hash[:user_agent],
72
+ ua.bot? ? 1 : 0,
73
+ (ua.name || ""),
74
+ (ua.version || ""),
75
+ (ua.platform.name || ""),
76
+ (ua.platform.version || "")
77
+ )
78
+ rescue
79
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
80
+ end
81
+ end
82
+
83
+ db
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,86 @@
1
+ require 'optparse'
2
+ require 'optparse/date'
3
+ require 'apache_log_report/version'
4
+
5
+ module ApacheLogReport
6
+ module OptionsParser
7
+ #
8
+ # parse command line options
9
+ #
10
+ def self.parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in Org Mode code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-fFORMAT", "--format=FORMAT", String, "Output format: html, org, sqlite. Defaults to org mode") do |n|
54
+ args[:output_format] = n
55
+ end
56
+
57
+ opts.on("-v", "--version", "Prints version information") do
58
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
59
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
60
+ puts "Distributed under the terms of the MIT license"
61
+ puts ""
62
+ puts "Written by Adolfo Villafiorita"
63
+ exit
64
+ end
65
+
66
+ opts.on("-h", "--help", "Prints this help") do
67
+ puts opts
68
+ puts "This is version #{ApacheLogReport::VERSION}"
69
+ exit
70
+ end
71
+ end
72
+
73
+ opt_parser.parse!(options)
74
+
75
+ args[:limit] ||= limit
76
+ args[:ignore_crawlers] ||= false
77
+ args[:no_selfpoll] ||= false
78
+ args[:only_crawlers] ||= false
79
+ args[:prefix] ||= ""
80
+ args[:suffix] ||= ""
81
+ args[:code_export] ||= "both"
82
+
83
+ return args
84
+ end
85
+ end
86
+ end