apache_log_report 0.9.9 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0cfcc34ca8741250adff411009618f4a359c4c59606a911a924416650fa352e7
4
- data.tar.gz: 4f366f09084266e0112f597aac8be998ff47df8ef50d8fd01b1c26361dc889b9
3
+ metadata.gz: 07ad311b79b7ad4888250d690af4857cc620b07a4fdc7206bbc40adf5b2fd00b
4
+ data.tar.gz: 1ed81f7bd6324af49a24ad6d8abe9add71710813c051537f254f80d9c9ac6739
5
5
  SHA512:
6
- metadata.gz: ddee7b7b3fd56327c3409f0a6de2778b83a0e67d743d300f00259ba314159d1cb62315f388fc0136052895903c7e6df47e08ba59b036db23f0722fc99e5f7b6d
7
- data.tar.gz: 0e857dbb34cd66057a401016afcd457c19c995c7aff66a87d4db230569b67a091c19d5f3446377f1a7ee8f8cda3b70ea0b8b4753bd7aea3beadc07810cf2d54b
6
+ metadata.gz: db323468d20cb50f870ac0dbe62cbfd42feb10e69da02d9d892dd206bcf9e5d1c3c5ad91a542f4f888bc04a9a6df3514ba186d67cc60b32e7647df6ff183e5d5
7
+ data.tar.gz: cf33e4665e31a6f18fc346c80b6f8b7cae8456b0c9dbf2d8c658a383b3711e754f795d08963d5c3c5109bd34360c5bc00bb17c93b15e4c551a7a9a1b5f619023
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- apache_log_report (0.9.7)
4
+ apache_log_report (1.1.2)
5
5
  apache_log-parser
6
6
  browser
7
7
  sqlite3
@@ -11,12 +11,12 @@ GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
13
  apache_log-parser (3.1.2)
14
- browser (5.0.0)
14
+ browser (5.3.1)
15
15
  rake (12.3.3)
16
16
  sqlite3 (1.4.2)
17
- terminal-table (1.8.0)
18
- unicode-display_width (~> 1.1, >= 1.1.1)
19
- unicode-display_width (1.7.0)
17
+ terminal-table (3.0.2)
18
+ unicode-display_width (>= 1.1.1, < 3)
19
+ unicode-display_width (2.1.0)
20
20
 
21
21
  PLATFORMS
22
22
  ruby
@@ -26,4 +26,4 @@ DEPENDENCIES
26
26
  rake (~> 12.0)
27
27
 
28
28
  BUNDLED WITH
29
- 2.1.4
29
+ 2.2.29
data/README.org CHANGED
@@ -14,10 +14,8 @@ See the [[file:CHANGELOG.org][CHANGELOG]] file.
14
14
 
15
15
  * Todo
16
16
 
17
- ** TODO Version information from command line and in reports
18
- ** TODO Refactor code from one giant class to more manageable chunkes
19
- ** TODO Move performance stats var to class (to isolate vars)
20
- ** TODO Check total number of days (which is not working, now)
17
+ ** TODO Graphs in HTML output
18
+ ** TODO Countries
21
19
 
22
20
  * Compatibility
23
21
 
@@ -2,12 +2,13 @@
2
2
 
3
3
  require 'apache_log_report.rb'
4
4
 
5
+
5
6
  #
6
7
  # Parse Command Line Arguments
7
8
  #
8
9
 
9
10
  @command = ARGV.join(" ")
10
- @options = ApacheLogReport.options_parse ARGV
11
+ @options = ApacheLogReport::OptionsParser.parse ARGV
11
12
  @log_file = ARGV[0]
12
13
 
13
14
  if @log_file and not File.exist? @log_file
@@ -20,12 +21,30 @@ end
20
21
  #
21
22
 
22
23
  @started_at = Time.now
23
- @db = ApacheLogReport.parse @log_file
24
- ApacheLogReport.analyze_data @db, @options
25
- @ended_at = Time.now
26
- @duration = @ended_at - @started_at
27
24
 
28
- #
29
- # Emit Output
30
- #
31
- puts ApacheLogReport.emit @options, @command, @log_file, @started_at, @ended_at, @duration
25
+ @db = ApacheLogReport::LogParser.parse @log_file
26
+
27
+ if @options[:output_format] == "sqlite"
28
+ ddb = SQLite3::Database.new('db.sqlite3')
29
+ b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
30
+ b.step(-1) #=> DONE
31
+ b.finish
32
+ else
33
+ @data = ApacheLogReport::DataCruncher.crunch @db, @options
34
+
35
+ @ended_at = Time.now
36
+ @duration = @ended_at - @started_at
37
+
38
+ @data = @data.merge({
39
+ command: @command,
40
+ log_file: @log_file,
41
+ started_at: @started_at,
42
+ ended_at: @ended_at,
43
+ duration: @duration
44
+ })
45
+
46
+ #
47
+ # Emit Output
48
+ #
49
+ puts ApacheLogReport::Emitter.emit @data, @options
50
+ end
@@ -0,0 +1,245 @@
1
+ module ApacheLogReport
2
+
3
+ #
4
+ # parse command line options
5
+ #
6
+ require 'optparse'
7
+ require 'optparse/date'
8
+ require 'apache_log_report/version'
9
+
10
+ def self.options_parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-v", "--version", "Prints version information") do
54
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
55
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
56
+ puts "Distributed under the terms of the MIT license"
57
+ puts ""
58
+ puts "Written by Adolfo Villafiorita"
59
+ exit
60
+ end
61
+
62
+ opts.on("-h", "--help", "Prints this help") do
63
+ puts opts
64
+ puts "This is version #{ApacheLogReport::VERSION}"
65
+ exit
66
+ end
67
+ end
68
+
69
+ opt_parser.parse!(options)
70
+
71
+ args[:limit] ||= limit
72
+ args[:ignore_crawlers] ||= false
73
+ args[:no_selfpoll] ||= false
74
+ args[:only_crawlers] ||= false
75
+ args[:prefix] ||= ""
76
+ args[:suffix] ||= ""
77
+ args[:code_export] ||= "both"
78
+
79
+ return args
80
+ end
81
+
82
+ #
83
+ # parse an Apache log file and return a SQLite3 DB
84
+ #
85
+ require 'apache_log/parser'
86
+ require 'sqlite3'
87
+ require 'browser'
88
+
89
+ def self.parse filename, options = {}
90
+ content = filename ? File.readlines(filename) : ARGF.readlines
91
+
92
+ db = SQLite3::Database.new ":memory:"
93
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
94
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
95
+ datetime TEXT,
96
+ ip TEXT,
97
+ user TEXT,
98
+ unique_visitor TEXT,
99
+ method TEXT,
100
+ path TEXT,
101
+ extension TEXT,
102
+ status TEXT,
103
+ size INTEGER,
104
+ referer TEXT,
105
+ user_agent TEXT,
106
+ bot INTEGER,
107
+ browser TEXT,
108
+ browser_version TEXT,
109
+ platform TEXT,
110
+ platform_version TEXT)"
111
+
112
+ ins = db.prepare('insert into LogLine (
113
+ datetime,
114
+ ip,
115
+ user,
116
+ unique_visitor,
117
+ method,
118
+ path,
119
+ extension,
120
+ status,
121
+ size,
122
+ referer,
123
+ user_agent,
124
+ bot,
125
+ browser,
126
+ browser_version,
127
+ platform,
128
+ platform_version)
129
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
130
+
131
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
132
+
133
+ content.each do |line|
134
+ begin
135
+ hash = parser.parse line
136
+
137
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
138
+ ins.execute(
139
+ hash[:datetime].iso8601,
140
+ hash[:remote_host],
141
+ hash[:user],
142
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
143
+ hash[:request][:method],
144
+ hash[:request][:path],
145
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
146
+ hash[:status],
147
+ hash[:size].to_i,
148
+ hash[:referer],
149
+ hash[:user_agent],
150
+ ua.bot? ? 1 : 0,
151
+ (ua.name || ""),
152
+ (ua.version || ""),
153
+ (ua.platform.name || ""),
154
+ (ua.platform.version || "")
155
+ )
156
+ rescue
157
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
158
+ end
159
+ end
160
+
161
+ db
162
+ end
163
+
164
+ #
165
+ # take a sqlite3 database and analyze data
166
+ #
167
+ def self.analyze_data db, options = {}
168
+
169
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
170
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
171
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
172
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
173
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
174
+
175
+ #
176
+ # generate the where clause corresponding to the command line options to filter data
177
+ #
178
+ @filter = [
179
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
180
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
181
+ (options[:only_crawlers] ? "bot == 1" : nil),
182
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
183
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
184
+ "true"
185
+ ].compact.join " and "
186
+
187
+ # in alternative to sum(size)
188
+ human_readable_size = <<-EOS
189
+ CASE
190
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
191
+ WHEN sum(size) >= 1024 AND sum(size) < (1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / 1024),2) || ' KB'
192
+ WHEN sum(size) >= (1024 * 1024) AND sum(size) < (1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024)),2) || ' MB'
193
+ WHEN sum(size) >= (1024 * 1024 * 1024) AND sum(size) < (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024)),2) || ' GB'
194
+ WHEN sum(size) >= (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024 * 1024)),2) || ' TB'
195
+ END AS size
196
+ EOS
197
+
198
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
199
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
200
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{@filter}"
201
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
202
+
203
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by date(datetime)"
204
+
205
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by strftime('%H', datetime)"
206
+
207
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
208
+
209
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
210
+
211
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
212
+
213
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
214
+
215
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
216
+ "extension != '#{x}'"
217
+ }.join " and "
218
+
219
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
220
+
221
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
222
+
223
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
224
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
225
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
226
+
227
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
228
+ [x[0], x[1].map { |y| y[1] }].flatten
229
+ }
230
+
231
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by browser order by count(browser) desc"
232
+
233
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by platform order by count(platform) desc"
234
+
235
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
236
+
237
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
238
+
239
+
240
+ end
241
+
242
+
243
+ end
244
+
245
+
@@ -0,0 +1,86 @@
1
+ module ApacheLogReport
2
+ module DataCruncher
3
+
4
+ #
5
+ # take a sqlite3 database and analyze data
6
+ #
7
+
8
+ def self.crunch db, options = {}
9
+ @first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
10
+ @last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
11
+ @log_size = db.execute "SELECT count(datetime) from LogLine"
12
+ @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
13
+ @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
14
+
15
+ #
16
+ # generate the where clause corresponding to the command line options to filter data
17
+ #
18
+ filter = [
19
+ (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
20
+ (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
21
+ (options[:only_crawlers] ? "bot == 1" : nil),
22
+ (options[:ignore_crawlers] ? "bot == 0" : nil),
23
+ (options[:no_selfpolls] ? "ip != '::1'" : nil),
24
+ "true"
25
+ ].compact.join " and "
26
+
27
+ mega = 1024 * 1024
28
+ giga = mega * 1024
29
+ tera = giga * 1024
30
+
31
+ # in alternative to sum(size)
32
+ human_readable_size = <<-EOS
33
+ CASE
34
+ WHEN sum(size) < 1024 THEN sum(size) || ' B'
35
+ WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
36
+ WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
37
+ WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
38
+ WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
39
+ END AS size
40
+ EOS
41
+
42
+ @total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
43
+ @total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
44
+ @total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
45
+ @total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
46
+
47
+ @daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
48
+ @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
49
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
50
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
51
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
52
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
53
+
54
+ @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
55
+ "extension != '#{x}'"
56
+ }.join " and "
57
+
58
+ @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
59
+ @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
60
+
61
+ @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
62
+ @by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
63
+ @by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
64
+
65
+ @statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
66
+ [x[0], x[1].map { |y| y[1] }].flatten
67
+ }
68
+
69
+ @browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
70
+ @platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
71
+ @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
72
+ @referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
73
+
74
+ @streaks = db.execute "SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime"
75
+
76
+ data = {}
77
+ self.instance_variables.each do |variable|
78
+ var_as_symbol = variable.to_s[1..-1].to_sym
79
+ data[var_as_symbol] = eval(variable.to_s)
80
+ end
81
+ data
82
+ end
83
+ end
84
+
85
+ end
86
+
@@ -0,0 +1,49 @@
1
+ require 'terminal-table'
2
+ require 'erb'
3
+ require 'ostruct'
4
+
5
+ module ApacheLogReport
6
+ module Emitter
7
+
8
+ #
9
+ # Emit Data
10
+ #
11
+ def self.emit data = {}, options = {}
12
+ @prefix = options[:prefix]
13
+ @suffix = options[:suffix]
14
+ @export = options[:code_export]
15
+ @mode = options[:output_format] || "org"
16
+
17
+ # for the ERB binding
18
+ @data = data
19
+ @options = options
20
+
21
+ # determine the main template to read
22
+ @template = File.join(File.dirname(__FILE__), "templates", "template.#{@mode}.erb")
23
+ erb_template = File.read @template
24
+
25
+ output = ERB.new(erb_template).result(binding)
26
+ puts output
27
+ end
28
+
29
+ private
30
+
31
+ def self.output_table name, headings, rows
32
+ name = "#+NAME: #{name}"
33
+ table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
34
+
35
+ #(2..headings.size).each do |i|
36
+ # table.align_column(i, :right)
37
+ #end
38
+
39
+ name + "\n" + table.to_s
40
+ end
41
+
42
+ def self.render(template, vars)
43
+ @template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
44
+ erb_template = File.read @template
45
+ ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,87 @@
1
+ require 'apache_log/parser'
2
+ require 'sqlite3'
3
+ require 'browser'
4
+
5
+ module ApacheLogReport
6
+ module LogParser
7
+ #
8
+ # parse an Apache log file and return a SQLite3 DB
9
+ #
10
+
11
+ def self.parse filename, options = {}
12
+ content = filename ? File.readlines(filename) : ARGF.readlines
13
+
14
+ db = SQLite3::Database.new ":memory:"
15
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ datetime TEXT,
18
+ ip TEXT,
19
+ user TEXT,
20
+ unique_visitor TEXT,
21
+ method TEXT,
22
+ path TEXT,
23
+ extension TEXT,
24
+ status TEXT,
25
+ size INTEGER,
26
+ referer TEXT,
27
+ user_agent TEXT,
28
+ bot INTEGER,
29
+ browser TEXT,
30
+ browser_version TEXT,
31
+ platform TEXT,
32
+ platform_version TEXT)"
33
+
34
+ ins = db.prepare('insert into LogLine (
35
+ datetime,
36
+ ip,
37
+ user,
38
+ unique_visitor,
39
+ method,
40
+ path,
41
+ extension,
42
+ status,
43
+ size,
44
+ referer,
45
+ user_agent,
46
+ bot,
47
+ browser,
48
+ browser_version,
49
+ platform,
50
+ platform_version)
51
+ values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
52
+
53
+ parser = ApacheLog::Parser.new(options[:format] || 'combined')
54
+
55
+ content.each do |line|
56
+ begin
57
+ hash = parser.parse line
58
+
59
+ ua = Browser.new(hash[:user_agent], accept_language: "en-us")
60
+ ins.execute(
61
+ hash[:datetime].iso8601,
62
+ hash[:remote_host],
63
+ hash[:user],
64
+ hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
65
+ hash[:request][:method],
66
+ hash[:request][:path],
67
+ (hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
68
+ hash[:status],
69
+ hash[:size].to_i,
70
+ hash[:referer],
71
+ hash[:user_agent],
72
+ ua.bot? ? 1 : 0,
73
+ (ua.name || ""),
74
+ (ua.version || ""),
75
+ (ua.platform.name || ""),
76
+ (ua.platform.version || "")
77
+ )
78
+ rescue
79
+ STDERR.puts "Apache Log parser error: could not parse #{line}"
80
+ end
81
+ end
82
+
83
+ db
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,86 @@
1
+ require 'optparse'
2
+ require 'optparse/date'
3
+ require 'apache_log_report/version'
4
+
5
+ module ApacheLogReport
6
+ module OptionsParser
7
+ #
8
+ # parse command line options
9
+ #
10
+ def self.parse options
11
+ limit = 30
12
+ args = {}
13
+
14
+ opt_parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: apache_log_report [options] [logfile]"
16
+
17
+ opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
18
+ args[:limit] = n
19
+ end
20
+
21
+ opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
22
+ args[:from_date] = n
23
+ end
24
+
25
+ opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
26
+ args[:to_date] = n
27
+ end
28
+
29
+ opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
30
+ args[:ignore_crawlers] = true
31
+ end
32
+
33
+ opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
34
+ args[:no_selfpoll] = true
35
+ end
36
+
37
+ opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
38
+ args[:only_crawlers] = true
39
+ end
40
+
41
+ opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
42
+ args[:prefix] = n
43
+ end
44
+
45
+ opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
46
+ args[:suffix] = n
47
+ end
48
+
49
+ opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in Org Mode code blocks (code, results, *both*, none)") do |n|
50
+ args[:code_export] = n
51
+ end
52
+
53
+ opts.on("-fFORMAT", "--format=FORMAT", String, "Output format: html, org, sqlite. Defaults to org mode") do |n|
54
+ args[:output_format] = n
55
+ end
56
+
57
+ opts.on("-v", "--version", "Prints version information") do
58
+ puts "apache_log_report version #{ApacheLogReport::VERSION}"
59
+ puts "Copyright (C) 2020 Adolfo Villafiorita"
60
+ puts "Distributed under the terms of the MIT license"
61
+ puts ""
62
+ puts "Written by Adolfo Villafiorita"
63
+ exit
64
+ end
65
+
66
+ opts.on("-h", "--help", "Prints this help") do
67
+ puts opts
68
+ puts "This is version #{ApacheLogReport::VERSION}"
69
+ exit
70
+ end
71
+ end
72
+
73
+ opt_parser.parse!(options)
74
+
75
+ args[:limit] ||= limit
76
+ args[:ignore_crawlers] ||= false
77
+ args[:no_selfpoll] ||= false
78
+ args[:only_crawlers] ||= false
79
+ args[:prefix] ||= ""
80
+ args[:suffix] ||= ""
81
+ args[:code_export] ||= "both"
82
+
83
+ return args
84
+ end
85
+ end
86
+ end