apache_log_report 0.9.8 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -6
- data/exe/apache_log_report +16 -4
- data/lib/apache_log_report/apache_log_report.rb +245 -0
- data/lib/apache_log_report/data_cruncher.rb +84 -0
- data/lib/apache_log_report/emitter.rb +49 -0
- data/lib/apache_log_report/log_parser.rb +87 -0
- data/lib/apache_log_report/options_parser.rb +86 -0
- data/lib/apache_log_report/templates/_output_table.html.erb +25 -0
- data/lib/apache_log_report/templates/template.html.erb +164 -0
- data/lib/apache_log_report/templates/template.org.erb +262 -0
- data/lib/apache_log_report/version.rb +1 -1
- data/lib/apache_log_report.rb +5 -521
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a00134ccecff45a5213565360c91ff2b87c40af4b47bcdda18327ed556dfea86
|
4
|
+
data.tar.gz: 6244d502e63bf9e699f2ad1f7188f17669d645125e15a7aa591815fa091f51b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3775358b99f42896eae739abf20898588c55026a64d5bac188ac9007a73767fc59c2f608d5a45fb05765bea3f3a2cdb8ae938a07d278232334edb85ba1ce2b5b
|
7
|
+
data.tar.gz: 82c92ff76df83a14deb79443d8b19eb46b5b5e50e81cfb469c5eeac21e66d2b79ad91a51b1ecec6ac3aafa4dc33ebf2137d5dc1ce3ff988131f2c869b787ad35
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
apache_log_report (
|
4
|
+
apache_log_report (1.1.0)
|
5
5
|
apache_log-parser
|
6
6
|
browser
|
7
7
|
sqlite3
|
@@ -11,12 +11,12 @@ GEM
|
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
13
|
apache_log-parser (3.1.2)
|
14
|
-
browser (5.
|
14
|
+
browser (5.3.1)
|
15
15
|
rake (12.3.3)
|
16
16
|
sqlite3 (1.4.2)
|
17
|
-
terminal-table (
|
18
|
-
unicode-display_width (
|
19
|
-
unicode-display_width (1.
|
17
|
+
terminal-table (3.0.2)
|
18
|
+
unicode-display_width (>= 1.1.1, < 3)
|
19
|
+
unicode-display_width (2.1.0)
|
20
20
|
|
21
21
|
PLATFORMS
|
22
22
|
ruby
|
@@ -26,4 +26,4 @@ DEPENDENCIES
|
|
26
26
|
rake (~> 12.0)
|
27
27
|
|
28
28
|
BUNDLED WITH
|
29
|
-
2.
|
29
|
+
2.2.29
|
data/exe/apache_log_report
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
require 'apache_log_report.rb'
|
4
4
|
|
5
|
+
|
5
6
|
#
|
6
7
|
# Parse Command Line Arguments
|
7
8
|
#
|
8
9
|
|
9
10
|
@command = ARGV.join(" ")
|
10
|
-
@options = ApacheLogReport.
|
11
|
+
@options = ApacheLogReport::OptionsParser.parse ARGV
|
11
12
|
@log_file = ARGV[0]
|
12
13
|
|
13
14
|
if @log_file and not File.exist? @log_file
|
@@ -20,12 +21,23 @@ end
|
|
20
21
|
#
|
21
22
|
|
22
23
|
@started_at = Time.now
|
23
|
-
|
24
|
-
ApacheLogReport.
|
24
|
+
|
25
|
+
@db = ApacheLogReport::LogParser.parse @log_file
|
26
|
+
@data = ApacheLogReport::DataCruncher.crunch @db, @options
|
27
|
+
|
25
28
|
@ended_at = Time.now
|
26
29
|
@duration = @ended_at - @started_at
|
27
30
|
|
31
|
+
@data = @data.merge({
|
32
|
+
command: @command,
|
33
|
+
log_file: @log_file,
|
34
|
+
started_at: @started_at,
|
35
|
+
ended_at: @ended_at,
|
36
|
+
duration: @duration
|
37
|
+
})
|
38
|
+
|
28
39
|
#
|
29
40
|
# Emit Output
|
30
41
|
#
|
31
|
-
|
42
|
+
|
43
|
+
puts ApacheLogReport::Emitter.emit @data, @options
|
@@ -0,0 +1,245 @@
|
|
1
|
+
module ApacheLogReport
|
2
|
+
|
3
|
+
#
|
4
|
+
# parse command line options
|
5
|
+
#
|
6
|
+
require 'optparse'
|
7
|
+
require 'optparse/date'
|
8
|
+
require 'apache_log_report/version'
|
9
|
+
|
10
|
+
def self.options_parse options
|
11
|
+
limit = 30
|
12
|
+
args = {}
|
13
|
+
|
14
|
+
opt_parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: apache_log_report [options] [logfile]"
|
16
|
+
|
17
|
+
opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
|
18
|
+
args[:limit] = n
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
|
22
|
+
args[:from_date] = n
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
|
26
|
+
args[:to_date] = n
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
|
30
|
+
args[:ignore_crawlers] = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
|
34
|
+
args[:no_selfpoll] = true
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
|
38
|
+
args[:only_crawlers] = true
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
42
|
+
args[:prefix] = n
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
46
|
+
args[:suffix] = n
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in code blocks (code, results, *both*, none)") do |n|
|
50
|
+
args[:code_export] = n
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-v", "--version", "Prints version information") do
|
54
|
+
puts "apache_log_report version #{ApacheLogReport::VERSION}"
|
55
|
+
puts "Copyright (C) 2020 Adolfo Villafiorita"
|
56
|
+
puts "Distributed under the terms of the MIT license"
|
57
|
+
puts ""
|
58
|
+
puts "Written by Adolfo Villafiorita"
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on("-h", "--help", "Prints this help") do
|
63
|
+
puts opts
|
64
|
+
puts "This is version #{ApacheLogReport::VERSION}"
|
65
|
+
exit
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
opt_parser.parse!(options)
|
70
|
+
|
71
|
+
args[:limit] ||= limit
|
72
|
+
args[:ignore_crawlers] ||= false
|
73
|
+
args[:no_selfpoll] ||= false
|
74
|
+
args[:only_crawlers] ||= false
|
75
|
+
args[:prefix] ||= ""
|
76
|
+
args[:suffix] ||= ""
|
77
|
+
args[:code_export] ||= "both"
|
78
|
+
|
79
|
+
return args
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# parse an Apache log file and return a SQLite3 DB
|
84
|
+
#
|
85
|
+
require 'apache_log/parser'
|
86
|
+
require 'sqlite3'
|
87
|
+
require 'browser'
|
88
|
+
|
89
|
+
def self.parse filename, options = {}
|
90
|
+
content = filename ? File.readlines(filename) : ARGF.readlines
|
91
|
+
|
92
|
+
db = SQLite3::Database.new ":memory:"
|
93
|
+
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
94
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
95
|
+
datetime TEXT,
|
96
|
+
ip TEXT,
|
97
|
+
user TEXT,
|
98
|
+
unique_visitor TEXT,
|
99
|
+
method TEXT,
|
100
|
+
path TEXT,
|
101
|
+
extension TEXT,
|
102
|
+
status TEXT,
|
103
|
+
size INTEGER,
|
104
|
+
referer TEXT,
|
105
|
+
user_agent TEXT,
|
106
|
+
bot INTEGER,
|
107
|
+
browser TEXT,
|
108
|
+
browser_version TEXT,
|
109
|
+
platform TEXT,
|
110
|
+
platform_version TEXT)"
|
111
|
+
|
112
|
+
ins = db.prepare('insert into LogLine (
|
113
|
+
datetime,
|
114
|
+
ip,
|
115
|
+
user,
|
116
|
+
unique_visitor,
|
117
|
+
method,
|
118
|
+
path,
|
119
|
+
extension,
|
120
|
+
status,
|
121
|
+
size,
|
122
|
+
referer,
|
123
|
+
user_agent,
|
124
|
+
bot,
|
125
|
+
browser,
|
126
|
+
browser_version,
|
127
|
+
platform,
|
128
|
+
platform_version)
|
129
|
+
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
130
|
+
|
131
|
+
parser = ApacheLog::Parser.new(options[:format] || 'combined')
|
132
|
+
|
133
|
+
content.each do |line|
|
134
|
+
begin
|
135
|
+
hash = parser.parse line
|
136
|
+
|
137
|
+
ua = Browser.new(hash[:user_agent], accept_language: "en-us")
|
138
|
+
ins.execute(
|
139
|
+
hash[:datetime].iso8601,
|
140
|
+
hash[:remote_host],
|
141
|
+
hash[:user],
|
142
|
+
hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
|
143
|
+
hash[:request][:method],
|
144
|
+
hash[:request][:path],
|
145
|
+
(hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
|
146
|
+
hash[:status],
|
147
|
+
hash[:size].to_i,
|
148
|
+
hash[:referer],
|
149
|
+
hash[:user_agent],
|
150
|
+
ua.bot? ? 1 : 0,
|
151
|
+
(ua.name || ""),
|
152
|
+
(ua.version || ""),
|
153
|
+
(ua.platform.name || ""),
|
154
|
+
(ua.platform.version || "")
|
155
|
+
)
|
156
|
+
rescue
|
157
|
+
STDERR.puts "Apache Log parser error: could not parse #{line}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
db
|
162
|
+
end
|
163
|
+
|
164
|
+
#
|
165
|
+
# take a sqlite3 database and analyze data
|
166
|
+
#
|
167
|
+
def self.analyze_data db, options = {}
|
168
|
+
|
169
|
+
@first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
170
|
+
@last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
171
|
+
@log_size = db.execute "SELECT count(datetime) from LogLine"
|
172
|
+
@crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
|
173
|
+
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
174
|
+
|
175
|
+
#
|
176
|
+
# generate the where clause corresponding to the command line options to filter data
|
177
|
+
#
|
178
|
+
@filter = [
|
179
|
+
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
180
|
+
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
181
|
+
(options[:only_crawlers] ? "bot == 1" : nil),
|
182
|
+
(options[:ignore_crawlers] ? "bot == 0" : nil),
|
183
|
+
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
184
|
+
"true"
|
185
|
+
].compact.join " and "
|
186
|
+
|
187
|
+
# in alternative to sum(size)
|
188
|
+
human_readable_size = <<-EOS
|
189
|
+
CASE
|
190
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
191
|
+
WHEN sum(size) >= 1024 AND sum(size) < (1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / 1024),2) || ' KB'
|
192
|
+
WHEN sum(size) >= (1024 * 1024) AND sum(size) < (1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024)),2) || ' MB'
|
193
|
+
WHEN sum(size) >= (1024 * 1024 * 1024) AND sum(size) < (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024)),2) || ' GB'
|
194
|
+
WHEN sum(size) >= (1024 * 1024 * 1024 * 1024) THEN ROUND((CAST(sum(size) AS REAL) / (1024 * 1024 * 1024 * 1024)),2) || ' TB'
|
195
|
+
END AS size
|
196
|
+
EOS
|
197
|
+
|
198
|
+
@total_hits = db.execute "SELECT count(datetime) from LogLine where #{@filter}"
|
199
|
+
@total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{@filter}"
|
200
|
+
@total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{@filter}"
|
201
|
+
@total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
|
202
|
+
|
203
|
+
@daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by date(datetime)"
|
204
|
+
|
205
|
+
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by strftime('%H', datetime)"
|
206
|
+
|
207
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
208
|
+
|
209
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
210
|
+
|
211
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
212
|
+
|
213
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} group by path order by count(path) desc limit #{options[:limit]}"
|
214
|
+
|
215
|
+
@reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
|
216
|
+
"extension != '#{x}'"
|
217
|
+
}.join " and "
|
218
|
+
|
219
|
+
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{@filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
220
|
+
|
221
|
+
@statuses = db.execute "SELECT status, count(status) from LogLine where #{@filter} group by status order by status"
|
222
|
+
|
223
|
+
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{@filter} group by date(datetime)"
|
224
|
+
@by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{@filter} group by date(datetime)"
|
225
|
+
@by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{@filter} group by date(datetime)"
|
226
|
+
|
227
|
+
@statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
|
228
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
229
|
+
}
|
230
|
+
|
231
|
+
@browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by browser order by count(browser) desc"
|
232
|
+
|
233
|
+
@platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by platform order by count(platform) desc"
|
234
|
+
|
235
|
+
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
236
|
+
|
237
|
+
@referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{@filter} group by referer order by count(referer) desc limit #{options[:limit]}"
|
238
|
+
|
239
|
+
|
240
|
+
end
|
241
|
+
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module ApacheLogReport
|
2
|
+
module DataCruncher
|
3
|
+
|
4
|
+
#
|
5
|
+
# take a sqlite3 database and analyze data
|
6
|
+
#
|
7
|
+
|
8
|
+
def self.crunch db, options = {}
|
9
|
+
@first_day = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
10
|
+
@last_day = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
11
|
+
@log_size = db.execute "SELECT count(datetime) from LogLine"
|
12
|
+
@crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
|
13
|
+
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
14
|
+
|
15
|
+
#
|
16
|
+
# generate the where clause corresponding to the command line options to filter data
|
17
|
+
#
|
18
|
+
filter = [
|
19
|
+
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
20
|
+
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
21
|
+
(options[:only_crawlers] ? "bot == 1" : nil),
|
22
|
+
(options[:ignore_crawlers] ? "bot == 0" : nil),
|
23
|
+
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
24
|
+
"true"
|
25
|
+
].compact.join " and "
|
26
|
+
|
27
|
+
mega = 1024 * 1024
|
28
|
+
giga = mega * 1024
|
29
|
+
tera = giga * 1024
|
30
|
+
|
31
|
+
# in alternative to sum(size)
|
32
|
+
human_readable_size = <<-EOS
|
33
|
+
CASE
|
34
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
35
|
+
WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
36
|
+
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
37
|
+
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
38
|
+
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
39
|
+
END AS size
|
40
|
+
EOS
|
41
|
+
|
42
|
+
@total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
|
43
|
+
@total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
|
44
|
+
@total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
|
45
|
+
@total_days = (Date.parse(@last_day[0][0]) - Date.parse(@first_day[0][0])).to_i
|
46
|
+
|
47
|
+
@daily_distribution = db.execute "SELECT date(datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
|
48
|
+
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
|
49
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
50
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
51
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
52
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
53
|
+
|
54
|
+
@reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
|
55
|
+
"extension != '#{x}'"
|
56
|
+
}.join " and "
|
57
|
+
|
58
|
+
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
59
|
+
@statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
|
60
|
+
|
61
|
+
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
|
62
|
+
@by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
|
63
|
+
@by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
|
64
|
+
|
65
|
+
@statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
|
66
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
67
|
+
}
|
68
|
+
|
69
|
+
@browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
|
70
|
+
@platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
|
71
|
+
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
72
|
+
@referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
|
73
|
+
|
74
|
+
data = {}
|
75
|
+
self.instance_variables.each do |variable|
|
76
|
+
var_as_symbol = variable.to_s[1..-1].to_sym
|
77
|
+
data[var_as_symbol] = eval(variable.to_s)
|
78
|
+
end
|
79
|
+
data
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'terminal-table'
|
2
|
+
require 'erb'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
module ApacheLogReport
|
6
|
+
module Emitter
|
7
|
+
|
8
|
+
#
|
9
|
+
# Emit Data
|
10
|
+
#
|
11
|
+
def self.emit data = {}, options = {}
|
12
|
+
@prefix = options[:prefix]
|
13
|
+
@suffix = options[:suffix]
|
14
|
+
@export = options[:code_export]
|
15
|
+
@mode = options[:output_format] || "org"
|
16
|
+
|
17
|
+
# for the ERB binding
|
18
|
+
@data = data
|
19
|
+
@options = options
|
20
|
+
|
21
|
+
# determine the main template to read
|
22
|
+
@template = File.join(File.dirname(__FILE__), "templates", "template.#{@mode}.erb")
|
23
|
+
erb_template = File.read @template
|
24
|
+
|
25
|
+
output = ERB.new(erb_template).result(binding)
|
26
|
+
puts output
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def self.output_table name, headings, rows
|
32
|
+
name = "#+NAME: #{name}"
|
33
|
+
table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
|
34
|
+
|
35
|
+
#(2..headings.size).each do |i|
|
36
|
+
# table.align_column(i, :right)
|
37
|
+
#end
|
38
|
+
|
39
|
+
name + "\n" + table.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.render(template, vars)
|
43
|
+
@template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
|
44
|
+
erb_template = File.read @template
|
45
|
+
ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'apache_log/parser'
|
2
|
+
require 'sqlite3'
|
3
|
+
require 'browser'
|
4
|
+
|
5
|
+
module ApacheLogReport
|
6
|
+
module LogParser
|
7
|
+
#
|
8
|
+
# parse an Apache log file and return a SQLite3 DB
|
9
|
+
#
|
10
|
+
|
11
|
+
def self.parse filename, options = {}
|
12
|
+
content = filename ? File.readlines(filename) : ARGF.readlines
|
13
|
+
|
14
|
+
db = SQLite3::Database.new ":memory:"
|
15
|
+
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
16
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
17
|
+
datetime TEXT,
|
18
|
+
ip TEXT,
|
19
|
+
user TEXT,
|
20
|
+
unique_visitor TEXT,
|
21
|
+
method TEXT,
|
22
|
+
path TEXT,
|
23
|
+
extension TEXT,
|
24
|
+
status TEXT,
|
25
|
+
size INTEGER,
|
26
|
+
referer TEXT,
|
27
|
+
user_agent TEXT,
|
28
|
+
bot INTEGER,
|
29
|
+
browser TEXT,
|
30
|
+
browser_version TEXT,
|
31
|
+
platform TEXT,
|
32
|
+
platform_version TEXT)"
|
33
|
+
|
34
|
+
ins = db.prepare('insert into LogLine (
|
35
|
+
datetime,
|
36
|
+
ip,
|
37
|
+
user,
|
38
|
+
unique_visitor,
|
39
|
+
method,
|
40
|
+
path,
|
41
|
+
extension,
|
42
|
+
status,
|
43
|
+
size,
|
44
|
+
referer,
|
45
|
+
user_agent,
|
46
|
+
bot,
|
47
|
+
browser,
|
48
|
+
browser_version,
|
49
|
+
platform,
|
50
|
+
platform_version)
|
51
|
+
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
52
|
+
|
53
|
+
parser = ApacheLog::Parser.new(options[:format] || 'combined')
|
54
|
+
|
55
|
+
content.each do |line|
|
56
|
+
begin
|
57
|
+
hash = parser.parse line
|
58
|
+
|
59
|
+
ua = Browser.new(hash[:user_agent], accept_language: "en-us")
|
60
|
+
ins.execute(
|
61
|
+
hash[:datetime].iso8601,
|
62
|
+
hash[:remote_host],
|
63
|
+
hash[:user],
|
64
|
+
hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
|
65
|
+
hash[:request][:method],
|
66
|
+
hash[:request][:path],
|
67
|
+
(hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
|
68
|
+
hash[:status],
|
69
|
+
hash[:size].to_i,
|
70
|
+
hash[:referer],
|
71
|
+
hash[:user_agent],
|
72
|
+
ua.bot? ? 1 : 0,
|
73
|
+
(ua.name || ""),
|
74
|
+
(ua.version || ""),
|
75
|
+
(ua.platform.name || ""),
|
76
|
+
(ua.platform.version || "")
|
77
|
+
)
|
78
|
+
rescue
|
79
|
+
STDERR.puts "Apache Log parser error: could not parse #{line}"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
db
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'optparse/date'
|
3
|
+
require 'apache_log_report/version'
|
4
|
+
|
5
|
+
module ApacheLogReport
|
6
|
+
module OptionsParser
|
7
|
+
#
|
8
|
+
# parse command line options
|
9
|
+
#
|
10
|
+
def self.parse options
|
11
|
+
limit = 30
|
12
|
+
args = {}
|
13
|
+
|
14
|
+
opt_parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: apache_log_report [options] [logfile]"
|
16
|
+
|
17
|
+
opts.on("-lN", "--limit=N", Integer, "Number of entries to show (defaults to #{limit})") do |n|
|
18
|
+
args[:limit] = n
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-bDATE", "--begin=DATE", DateTime, "Consider entries after or on DATE") do |n|
|
22
|
+
args[:from_date] = n
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-eDATE", "--end=DATE", DateTime, "Consider entries before or on DATE") do |n|
|
26
|
+
args[:to_date] = n
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-i", "--ignore-crawlers", "Ignore crawlers") do
|
30
|
+
args[:ignore_crawlers] = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-p", "--ignore-selfpoll", "Ignore apaches self poll entries (from ::1)") do
|
34
|
+
args[:no_selfpoll] = true
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-c", "--only-crawlers", "Perform analysis on crawlers only") do
|
38
|
+
args[:only_crawlers] = true
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-uPREFIX", "--prefix=PREFIX", String, "Prefix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
42
|
+
args[:prefix] = n
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-wSUFFIX", "--suffix=SUFFIX", String, "Suffix to add to all plots (used to run multiple analyses in the same dir)") do |n|
|
46
|
+
args[:suffix] = n
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("-cWHAT", "--code-export=WHAT", String, "Control :export directive in Org Mode code blocks (code, results, *both*, none)") do |n|
|
50
|
+
args[:code_export] = n
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-fFORMAT", "--format=FORMAT", String, "Output format: html, org. Defaults to org mode") do |n|
|
54
|
+
args[:output_format] = n
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("-v", "--version", "Prints version information") do
|
58
|
+
puts "apache_log_report version #{ApacheLogReport::VERSION}"
|
59
|
+
puts "Copyright (C) 2020 Adolfo Villafiorita"
|
60
|
+
puts "Distributed under the terms of the MIT license"
|
61
|
+
puts ""
|
62
|
+
puts "Written by Adolfo Villafiorita"
|
63
|
+
exit
|
64
|
+
end
|
65
|
+
|
66
|
+
opts.on("-h", "--help", "Prints this help") do
|
67
|
+
puts opts
|
68
|
+
puts "This is version #{ApacheLogReport::VERSION}"
|
69
|
+
exit
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
opt_parser.parse!(options)
|
74
|
+
|
75
|
+
args[:limit] ||= limit
|
76
|
+
args[:ignore_crawlers] ||= false
|
77
|
+
args[:no_selfpoll] ||= false
|
78
|
+
args[:only_crawlers] ||= false
|
79
|
+
args[:prefix] ||= ""
|
80
|
+
args[:suffix] ||= ""
|
81
|
+
args[:code_export] ||= "both"
|
82
|
+
|
83
|
+
return args
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
<%
|
2
|
+
def slugify string
|
3
|
+
string.downcase.gsub(/ +/, '-')
|
4
|
+
end
|
5
|
+
%>
|
6
|
+
|
7
|
+
<table id="<%= slugify(title || "") %>" class="<%= slugify(title || "") %>">
|
8
|
+
<thead>
|
9
|
+
<tr>
|
10
|
+
<% header.each do |heading| %>
|
11
|
+
<th class="<%= slugify(heading) %>"><%= heading %></th>
|
12
|
+
<% end %>
|
13
|
+
</tr>
|
14
|
+
</thead>
|
15
|
+
<tbody>
|
16
|
+
<% rows.each do |row| %>
|
17
|
+
<tr>
|
18
|
+
<% row.each_with_index do |cell, i| %>
|
19
|
+
<td class="<%= slugify (header[i] || "") %>"><%= cell %></td>
|
20
|
+
<% end %>
|
21
|
+
</tr>
|
22
|
+
<% end %>
|
23
|
+
</tbody>
|
24
|
+
</table>
|
25
|
+
|