log_sense 1.5.2 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.org +27 -0
- data/Gemfile.lock +6 -4
- data/README.org +108 -34
- data/Rakefile +6 -6
- data/exe/log_sense +110 -39
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/aggregator.rb +191 -0
- data/lib/log_sense/apache_aggregator.rb +122 -0
- data/lib/log_sense/apache_log_line_parser.rb +23 -21
- data/lib/log_sense/apache_log_parser.rb +15 -12
- data/lib/log_sense/apache_report_shaper.rb +309 -0
- data/lib/log_sense/emitter.rb +55 -553
- data/lib/log_sense/ip_locator.rb +24 -12
- data/lib/log_sense/options_checker.rb +24 -0
- data/lib/log_sense/options_parser.rb +81 -51
- data/lib/log_sense/rails_aggregator.rb +69 -0
- data/lib/log_sense/rails_log_parser.rb +82 -68
- data/lib/log_sense/rails_report_shaper.rb +183 -0
- data/lib/log_sense/report_shaper.rb +105 -0
- data/lib/log_sense/templates/_cdn_links.html.erb +11 -0
- data/lib/log_sense/templates/_command_invocation.html.erb +4 -0
- data/lib/log_sense/templates/_log_structure.html.erb +7 -1
- data/lib/log_sense/templates/_output_table.html.erb +6 -2
- data/lib/log_sense/templates/_rails.css.erb +7 -0
- data/lib/log_sense/templates/_summary.html.erb +9 -7
- data/lib/log_sense/templates/_summary.txt.erb +2 -2
- data/lib/log_sense/templates/{rails.html.erb → report_html.erb} +19 -37
- data/lib/log_sense/templates/{apache.txt.erb → report_txt.erb} +1 -1
- data/lib/log_sense/version.rb +1 -1
- data/lib/log_sense.rb +19 -9
- data/log_sense.gemspec +1 -1
- data/{apache-screenshot.png → screenshots/apache-screenshot.png} +0 -0
- data/screenshots/rails-screenshot.png +0 -0
- metadata +17 -11
- data/lib/log_sense/apache_data_cruncher.rb +0 -147
- data/lib/log_sense/rails_data_cruncher.rb +0 -141
- data/lib/log_sense/templates/apache.html.erb +0 -115
- data/lib/log_sense/templates/rails.txt.erb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d269dedfbb6ec6eae3a77491cc5ec7ca6241f388f2658964541cdd3983b8298
|
4
|
+
data.tar.gz: 6f24d23c8d06430b3605aad90522e08818cd18fd6c71c5fe90823cdc9483c81e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a63f715b281101a6f61029da3e2bcf5db4d47a537af562507b0184d6c6755eed436afed729c31cc95255bf064cbe9f487917c96d78199bbee25e6d4189469951
|
7
|
+
data.tar.gz: 77c62a24c3c81067dd0288ad606b732e0f112d0d1710b326be9e894aa72a93db4cb0a188c93b54ada728c2eeb0cf7378fb7033e13982c9a0932414c8455d2703
|
data/CHANGELOG.org
CHANGED
@@ -2,6 +2,33 @@
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
3
3
|
#+STARTUP: showall
|
4
4
|
|
5
|
+
* 1.6.0
|
6
|
+
|
7
|
+
- [User] New output format =ufw= generates directives to blacklist IPs
|
8
|
+
requesting URLs matching a pattern. For users of the Uncomplicated
|
9
|
+
Firewall.
|
10
|
+
- [User] new option =--no-geo= skips geolocation, which is terribly
|
11
|
+
costly in the current implementation.
|
12
|
+
- [User] Updated DB-IP country file to Dec 2022 version.
|
13
|
+
- [User] Changed name of SQLite output format to sqlite3
|
14
|
+
- [User] It is now possible to start analysis from a sqlite3 DB
|
15
|
+
generated by log_sense, breaking parsing and generation in two
|
16
|
+
steps.
|
17
|
+
- [User] Check for correctness of I/O formats before launching
|
18
|
+
analysis
|
19
|
+
- [User] Streak report has been renames Session. Limited the number
|
20
|
+
of URLs shown in each session, to avoid buffer?/memory overflows
|
21
|
+
when an IP requests a massive amount of URLs.
|
22
|
+
- [User] Added an IP-per-hour visits report.
|
23
|
+
- [Code] A rather extensive refactoring of the source code to
|
24
|
+
remove code duplications and improve code structure.
|
25
|
+
- [Code] Rubocop-ped various files
|
26
|
+
- [Code] Added text renderer to DataTable, which sanitizes input and
|
27
|
+
further reduces risks of XSS and log poisoning attacks
|
28
|
+
- [Code] CDN links have been ported into the Emitter module and used
|
29
|
+
in the Embedded Ruby Templates (erbs). This simplifies version
|
30
|
+
updates of Javascript libraries used in reports.
|
31
|
+
|
5
32
|
* 1.5.2
|
6
33
|
|
7
34
|
- [User] Updated DB-IP country file.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
log_sense (1.5.
|
4
|
+
log_sense (1.5.3)
|
5
5
|
browser
|
6
6
|
ipaddr
|
7
7
|
iso_country_codes
|
@@ -16,18 +16,20 @@ GEM
|
|
16
16
|
irb (>= 1.3.6)
|
17
17
|
reline (>= 0.3.1)
|
18
18
|
io-console (0.5.11)
|
19
|
-
ipaddr (1.2.
|
19
|
+
ipaddr (1.2.5)
|
20
20
|
irb (1.4.1)
|
21
21
|
reline (>= 0.3.0)
|
22
22
|
iso_country_codes (0.7.8)
|
23
|
+
mini_portile2 (2.8.0)
|
23
24
|
minitest (5.15.0)
|
24
25
|
rake (12.3.3)
|
25
26
|
reline (0.3.1)
|
26
27
|
io-console (~> 0.5)
|
27
|
-
sqlite3 (1.
|
28
|
+
sqlite3 (1.5.4)
|
29
|
+
mini_portile2 (~> 2.8.0)
|
28
30
|
terminal-table (3.0.2)
|
29
31
|
unicode-display_width (>= 1.1.1, < 3)
|
30
|
-
unicode-display_width (2.
|
32
|
+
unicode-display_width (2.3.0)
|
31
33
|
|
32
34
|
PLATFORMS
|
33
35
|
ruby
|
data/README.org
CHANGED
@@ -9,7 +9,7 @@ Rails logs. Written in Ruby, it runs from the command line, it is
|
|
9
9
|
fast, and it can be installed on any system with a relatively recent
|
10
10
|
version of Ruby. We tested on Ruby 2.6.9, Ruby 3.0.x and later.
|
11
11
|
|
12
|
-
LogSense reports the following data:
|
12
|
+
When generating reports, LogSense reports the following data:
|
13
13
|
|
14
14
|
- Visitors, hits, unique visitors, bandwidth used
|
15
15
|
- Most accessed HTML pages
|
@@ -22,18 +22,49 @@ LogSense reports the following data:
|
|
22
22
|
- IP Country location, thanks to the DP-IP lite country DB
|
23
23
|
- Streaks: resources accessed by a given IP over time
|
24
24
|
- Performance of Rails requests
|
25
|
+
|
26
|
+
A special output format =ufw= generates rules for the [[https://launchpad.net/ufw][Uncomplicated
|
27
|
+
Firewall]] to blacklist IPs requesting URLs matching a specific pattern.
|
25
28
|
|
26
29
|
Filters from the command line allow to analyze specific periods and
|
27
30
|
distinguish traffic generated by self polls and crawlers.
|
28
31
|
|
29
|
-
LogSense generates HTML, txt, and SQLite outputs.
|
32
|
+
LogSense generates HTML, txt, ufw, and SQLite outputs.
|
30
33
|
|
31
|
-
|
34
|
+
** Apache Report Structure
|
32
35
|
|
33
36
|
#+ATTR_HTML: :width 80%
|
34
|
-
[[file:./apache-screenshot.png]]
|
37
|
+
[[file:./screenshots/apache-screenshot.png]]
|
38
|
+
|
39
|
+
|
40
|
+
** Rails Report Structure
|
41
|
+
|
42
|
+
#+ATTR_HTML: :width 80%
|
43
|
+
[[file:./screenshots/rails-screenshot.png]]
|
44
|
+
|
45
|
+
|
46
|
+
** UFW Report
|
35
47
|
|
48
|
+
The output format =ufw= generates directives for Uncomplicated
|
49
|
+
Firewall blacklisting IPs requesting URLs matching a given pattern.
|
36
50
|
|
51
|
+
We use it to blacklist IPs requesting WordPress login pages on our
|
52
|
+
websites... since we don't use WordPress for our websites.
|
53
|
+
|
54
|
+
*Example*
|
55
|
+
|
56
|
+
#+begin_src
|
57
|
+
$ log_sense -f apache -t ufw -i apache.log
|
58
|
+
# /users/sign_in/xmlrpc.php?rsd
|
59
|
+
ufw deny from 20.212.3.206
|
60
|
+
|
61
|
+
# /wp-login.php /wordpress/wp-login.php /blog/wp-login.php /wp/wp-login.php
|
62
|
+
ufw deny from 185.255.134.18
|
63
|
+
|
64
|
+
...
|
65
|
+
#+end_src
|
66
|
+
|
67
|
+
|
37
68
|
* An important word of warning
|
38
69
|
|
39
70
|
[[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
|
@@ -48,9 +79,10 @@ opened or code executed.
|
|
48
79
|
* Motivation
|
49
80
|
|
50
81
|
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
|
51
|
-
strongly inspired the development of Log Sense) and [[https://umami.is/][Umami]],
|
52
|
-
*privacy* and *data-ownership*: the data generated by
|
53
|
-
stored on your computer and owned by you (like it should
|
82
|
+
strongly inspired the development of Log Sense) and [[https://umami.is/][Umami]], both
|
83
|
+
focusing on *privacy* and *data-ownership*: the data generated by
|
84
|
+
LogSense is stored on your computer and owned by you (like it should
|
85
|
+
be)[fn:1].
|
54
86
|
|
55
87
|
LogSense is also inspired by *static websites generators*: statistics
|
56
88
|
are generated from the command line and accessed as static HTML files.
|
@@ -76,33 +108,30 @@ generated files are then made available on a private area on the web.
|
|
76
108
|
#+RESULTS:
|
77
109
|
#+begin_example
|
78
110
|
Usage: log_sense [options] [logfile ...]
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
- sqlite
|
104
|
-
- html
|
105
|
-
- txt
|
111
|
+
--title=TITLE Title to use in the report
|
112
|
+
-f, --input-format=FORMAT Input format (either rails or apache)
|
113
|
+
-i, --input-files=file,file, Input files (can also be passed directly)
|
114
|
+
-t, --output-format=FORMAT Output format: html, org, txt, sqlite.
|
115
|
+
-o, --output-file=OUTPUT_FILE Output file
|
116
|
+
-b, --begin=DATE Consider entries after or on DATE
|
117
|
+
-e, --end=DATE Consider entries before or on DATE
|
118
|
+
-l, --limit=N Limit to the N most requested resources (defaults to 100)
|
119
|
+
-w, --width=WIDTH Maximum width of long columns in textual reports
|
120
|
+
-r, --rows=ROWS Maximum number of rows for columns with multiple entries in textual reports
|
121
|
+
-p, --pattern=PATTERN Pattern to use with ufw report to decide IP to blacklist
|
122
|
+
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
123
|
+
--no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
|
124
|
+
-n, --no-geog Do not geolocate entries
|
125
|
+
--verbose Inform about progress (output to STDERR)
|
126
|
+
-v, --version Prints version information
|
127
|
+
-h, --help Prints this help
|
128
|
+
|
129
|
+
This is version 1.6.0
|
130
|
+
|
131
|
+
Output formats:
|
132
|
+
|
133
|
+
- rails: txt, html, sqlite3, ufw
|
134
|
+
- apache: txt, html, sqlite3, ufw
|
106
135
|
#+end_example
|
107
136
|
|
108
137
|
Examples:
|
@@ -112,6 +141,51 @@ log_sense -f apache -i access.log -t txt > access-data.txt
|
|
112
141
|
log_sense -f rails -i production.log -t html -o performance.html
|
113
142
|
#+end_example
|
114
143
|
|
144
|
+
* Code Structure
|
145
|
+
|
146
|
+
The code implements a pipeline, with the following steps:
|
147
|
+
|
148
|
+
1. *Parser:* parses a log to a SQLite3 database. The database
|
149
|
+
contains a table with a list of events, and, in the case of Rails
|
150
|
+
report, a table with the errors.
|
151
|
+
2. *Aggregator:* takes as input a SQLite DB and aggregates data,
|
152
|
+
typically performing "group by", which are simpler to generate in
|
153
|
+
Ruby, rather than in SQL. The module outputs a Hash, with
|
154
|
+
different reporting data.
|
155
|
+
3. *GeoLocator:* add country information to all the reporting data
|
156
|
+
which has an IP as one the fields.
|
157
|
+
4. *Shaper:* makes (geolocated) aggregated data (e.g. Hashes and
|
158
|
+
such), into Array of Arrays, simplifying the structure of the code
|
159
|
+
building the reports.
|
160
|
+
5. *Emitter* generates reports from shaped data using ERB.
|
161
|
+
|
162
|
+
The architecture and the structure of the code is far from being nice,
|
163
|
+
for historical reason and for a bunch of small differences existing
|
164
|
+
between the input and the outputs to be generated. This usually ends
|
165
|
+
up with modifications to the code that have to be replicated in
|
166
|
+
different parts of the code and in interferences.
|
167
|
+
|
168
|
+
Among the points I would like to address:
|
169
|
+
|
170
|
+
- The execution pipeline in the main script has a few exceptions to
|
171
|
+
manage SQLite reading/dumping and ufw report. A linear structure
|
172
|
+
would be a lot nicer.
|
173
|
+
- Two different classes are defined for steps 1, 2, and 4, to manage,
|
174
|
+
respectively, Apache and Rails logs. These classes inherit from a
|
175
|
+
common ancestor (e.g. ApacheParser and RailsParser both inherit from
|
176
|
+
Parser), but there is still too little code shared. A nicer
|
177
|
+
approach would be that of identifying a common DB structure and
|
178
|
+
unify the pipeline up to (or including) the generation of
|
179
|
+
reports. There are a bunch of small different things to highlight in
|
180
|
+
reports, which still make this difficult. For instance, the country
|
181
|
+
report for Apache reports size of TX data, which is not available
|
182
|
+
for Rail reports.
|
183
|
+
- Geolocation could become a lot more efficient if performed in
|
184
|
+
SQLite, rather than in Ruby
|
185
|
+
- The distinction between Aggregation, Shaping, and Emission is a too
|
186
|
+
fine-grained and it would be nice to be able to cleanly remove one
|
187
|
+
of the steps.
|
188
|
+
|
115
189
|
|
116
190
|
* Change Log
|
117
191
|
|
data/Rakefile
CHANGED
@@ -9,18 +9,18 @@ end
|
|
9
9
|
require_relative './lib/log_sense/ip_locator.rb'
|
10
10
|
|
11
11
|
desc "Convert Geolocation DB to sqlite"
|
12
|
-
task :
|
13
|
-
filename =
|
12
|
+
task :dbip, [:filename] do |tasks, args|
|
13
|
+
filename = args[:filename]
|
14
14
|
|
15
15
|
if !File.exist? filename
|
16
16
|
puts "Error. Could not find: #{filename}"
|
17
17
|
puts
|
18
18
|
puts 'I see the following files:'
|
19
19
|
puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
|
20
|
-
puts
|
21
|
-
puts
|
22
|
-
puts
|
23
|
-
puts
|
20
|
+
puts
|
21
|
+
puts "1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite"
|
22
|
+
puts "2. Save downloaded file to ip_locations/"
|
23
|
+
puts "3. Relaunch with YYYY-MM"
|
24
24
|
|
25
25
|
exit
|
26
26
|
else
|
data/exe/log_sense
CHANGED
@@ -1,82 +1,153 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
3
|
+
require "log_sense"
|
4
|
+
require "sqlite3"
|
4
5
|
|
5
6
|
#
|
6
7
|
# Parse Command Line Arguments
|
7
8
|
#
|
8
9
|
|
9
10
|
# this better be here... OptionsParser consumes ARGV
|
10
|
-
@command_line = ARGV.join(
|
11
|
-
@options
|
12
|
-
@
|
11
|
+
@command_line = ARGV.join(" ")
|
12
|
+
@options = LogSense::OptionsParser.parse ARGV
|
13
|
+
@input_filenames = @options[:input_filenames] + ARGV
|
14
|
+
@output_filename = @options[:output_filename]
|
13
15
|
|
14
16
|
#
|
15
|
-
#
|
16
|
-
#
|
17
|
+
# Check correctness of input data.
|
18
|
+
#
|
19
|
+
|
20
|
+
#
|
21
|
+
# Check input files
|
17
22
|
#
|
18
|
-
@input_filenames = @options[:input_filenames] + ARGV
|
19
23
|
@non_existing = @input_filenames.reject { |x| File.exist?(x) }
|
20
24
|
|
21
|
-
|
22
|
-
|
25
|
+
if @non_existing.any?
|
26
|
+
warn "Error: some input file(s) \"#{@non_existing.join(", ")}\" do not exist"
|
27
|
+
exit 1
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Special condition: sqlite3 requires a single file as input
|
32
|
+
#
|
33
|
+
if @input_filenames.size > 0 &&
|
34
|
+
File.extname(@input_filenames.first) == "sqlite3" &&
|
35
|
+
@input_filenames.size > 1
|
36
|
+
warn "Error: you can pass only one sqlite3 file as input"
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Supported input/output chains
|
42
|
+
#
|
43
|
+
iformat = @options[:input_format]
|
44
|
+
oformat = @options[:output_format]
|
45
|
+
|
46
|
+
if !LogSense::OptionsChecker::compatible?(iformat, oformat)
|
47
|
+
warn "Error: don't know how to make #{iformat} into #{oformat}."
|
48
|
+
warn "Possible transformation chains:"
|
49
|
+
warn LogSense::OptionsChecker.chains_to_s
|
23
50
|
exit 1
|
24
51
|
end
|
25
|
-
@input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
|
26
52
|
|
27
53
|
#
|
28
|
-
#
|
54
|
+
# Do the work
|
29
55
|
#
|
30
56
|
|
31
57
|
@started_at = Time.now
|
32
58
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
when 'rails'
|
38
|
-
parser_klass = LogSense::RailsLogParser
|
39
|
-
cruncher_klass = LogSense::RailsDataCruncher
|
59
|
+
if @input_filenames.size > 0 &&
|
60
|
+
File.extname(@input_filenames.first) == ".sqlite3"
|
61
|
+
warn "Reading SQLite3 DB ..." if @options[:verbose]
|
62
|
+
@db = SQLite3::Database.open @input_filenames.first
|
40
63
|
else
|
41
|
-
|
42
|
-
|
64
|
+
warn "Parsing ..." if @options[:verbose]
|
65
|
+
@input_files = if @input_filenames.empty?
|
66
|
+
[$stdin]
|
67
|
+
else
|
68
|
+
@input_filenames.map { |fname| File.open(fname, "r") }
|
69
|
+
end
|
70
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}LogParser"
|
71
|
+
parser_class = Object.const_get class_name
|
72
|
+
parser = parser_class.new
|
73
|
+
@db = parser.parse @input_files
|
43
74
|
end
|
44
75
|
|
45
|
-
|
46
|
-
|
76
|
+
if @options[:output_format] == "sqlite3"
|
77
|
+
warn "Saving SQLite3 DB ..." if @options[:verbose]
|
47
78
|
|
48
|
-
|
49
|
-
|
50
|
-
ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
|
51
|
-
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
79
|
+
ddb = SQLite3::Database.new(@output_filename || "db.sqlite3")
|
80
|
+
b = SQLite3::Backup.new(ddb, "main", @db, "main")
|
52
81
|
b.step(-1) #=> DONE
|
53
82
|
b.finish
|
83
|
+
|
84
|
+
exit 0
|
85
|
+
elsif @options[:output_format] == "ufw"
|
86
|
+
pattern = @options[:pattern] || "php"
|
87
|
+
|
88
|
+
if @options[:input_format] == "rails"
|
89
|
+
query = "select distinct event.ip,event.url
|
90
|
+
from error join event
|
91
|
+
where event.log_id = error.log_id and
|
92
|
+
event.url like '%#{pattern}%'"
|
93
|
+
else
|
94
|
+
query = "select distinct ip,path from logline
|
95
|
+
where path like '%#{pattern}%'"
|
96
|
+
end
|
97
|
+
|
98
|
+
ips = @db.execute query
|
99
|
+
ips_and_urls = ips.group_by { |x| x[0] }.transform_values { |x|
|
100
|
+
x.map { |y| y[1..-1] }.flatten
|
101
|
+
}
|
102
|
+
ips_and_urls.each do |ip, urls|
|
103
|
+
puts "# #{urls[0..10].uniq.join(' ')}"
|
104
|
+
puts "ufw deny from #{ip}"
|
105
|
+
puts
|
106
|
+
end
|
107
|
+
|
108
|
+
exit 0
|
54
109
|
else
|
55
|
-
|
56
|
-
|
110
|
+
warn "Aggregating data ..." if @options[:verbose]
|
111
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}Aggregator"
|
112
|
+
aggr_class = Object.const_get class_name
|
113
|
+
aggr = aggr_class.new(@db, @options)
|
114
|
+
@data = aggr.aggregate
|
57
115
|
|
58
|
-
|
59
|
-
|
116
|
+
if @options[:geolocation]
|
117
|
+
warn "Geolocating ..." if @options[:verbose]
|
118
|
+
@data = LogSense::IpLocator.geolocate @data
|
60
119
|
|
61
|
-
|
62
|
-
|
63
|
-
|
120
|
+
warn "Grouping IPs by country ..." if @options[:verbose]
|
121
|
+
country_col = @data[:ips][0].size - 1
|
122
|
+
@data[:countries] = @data[:ips].group_by { |x| x[country_col] }
|
123
|
+
else
|
124
|
+
@data[:countries] = {}
|
125
|
+
end
|
64
126
|
|
65
127
|
@ended_at = Time.now
|
66
128
|
@duration = @ended_at - @started_at
|
67
129
|
|
68
130
|
@data = @data.merge({
|
69
131
|
command: @command_line,
|
70
|
-
filenames:
|
132
|
+
filenames: @input_filenames,
|
71
133
|
log_files: @input_files,
|
72
134
|
started_at: @started_at,
|
73
135
|
ended_at: @ended_at,
|
74
136
|
duration: @duration,
|
75
137
|
width: @options[:width]
|
76
138
|
})
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
139
|
+
|
140
|
+
if @options[:verbose]
|
141
|
+
warn "I have the following keys in data: "
|
142
|
+
warn @data.keys.sort.map { |key| "#{key}: #{@data[key].class}" }.join("\n")
|
143
|
+
end
|
144
|
+
|
145
|
+
warn "Shaping data for output ..." if @options[:verbose]
|
146
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}ReportShaper"
|
147
|
+
shaper_class = Object.const_get class_name
|
148
|
+
shaper = shaper_class.new
|
149
|
+
@reports = shaper.shape @data
|
150
|
+
|
151
|
+
warn "Emitting..." if @options[:verbose]
|
152
|
+
puts LogSense::Emitter.emit @reports, @data, @options
|
82
153
|
end
|
Binary file
|
@@ -0,0 +1,191 @@
|
|
1
|
+
module LogSense
|
2
|
+
class Aggregator
|
3
|
+
def initialize
|
4
|
+
# not meant to be used directly
|
5
|
+
raise StandardError
|
6
|
+
end
|
7
|
+
|
8
|
+
protected
|
9
|
+
|
10
|
+
def logged_query(query)
|
11
|
+
puts query
|
12
|
+
@db.execute query
|
13
|
+
end
|
14
|
+
|
15
|
+
def aggregate_log_info
|
16
|
+
first_day_s = @db.execute "SELECT #{@date_field} from #{@table}
|
17
|
+
where #{@date_field} not NULL
|
18
|
+
order by #{@date_field}
|
19
|
+
limit 1"
|
20
|
+
last_day_s = @db.execute "SELECT #{@date_field} from #{@table}
|
21
|
+
where #{@date_field} not NULL
|
22
|
+
order by #{@date_field} desc
|
23
|
+
limit 1"
|
24
|
+
|
25
|
+
# make first and last day into dates or nil
|
26
|
+
@first_day = first_day_s&.first&.first ? Date.parse(first_day_s[0][0]) : nil
|
27
|
+
@last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
|
28
|
+
|
29
|
+
@total_days = 0
|
30
|
+
@total_days = (@last_day - @first_day).to_i if @first_day && @last_day
|
31
|
+
|
32
|
+
evs = @db.execute "SELECT count(#{@date_field}) from #{@table}"
|
33
|
+
@events_in_log = @log_size = evs[0][0]
|
34
|
+
|
35
|
+
evs = @db.execute "SELECT count(#{@date_field}) from #{@table} where #{filter}"
|
36
|
+
@events = evs[0][0]
|
37
|
+
|
38
|
+
@source_files = @db.execute "SELECT distinct(source_file) from #{@table}"
|
39
|
+
|
40
|
+
tuv = @db.execute "SELECT count(distinct(unique_visitor)) from #{@table}
|
41
|
+
where #{filter}"
|
42
|
+
@total_unique_visits = tuv[0][0]
|
43
|
+
|
44
|
+
@first_day_requested = @options[:from_date]
|
45
|
+
@last_day_requested = @options[:to_date]
|
46
|
+
|
47
|
+
@first_day_in_analysis = date_sel @first_day_requested, @first_day, :max
|
48
|
+
@last_day_in_analysis = date_sel @last_day_requested, @last_day, :min
|
49
|
+
|
50
|
+
@total_days_in_analysis = 0
|
51
|
+
if @first_day_in_analysis && @last_day_in_analysis
|
52
|
+
diff = (@last_day_in_analysis - @first_day_in_analysis).to_i
|
53
|
+
@total_days_in_analysis = diff
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def aggregate_statuses
|
58
|
+
@statuses = @db.execute %(SELECT status, count(status) from #{@table}
|
59
|
+
where #{filter}
|
60
|
+
group by status
|
61
|
+
order by status)
|
62
|
+
|
63
|
+
@by_day_5xx = @db.execute status_query(5)
|
64
|
+
@by_day_4xx = @db.execute status_query(4)
|
65
|
+
@by_day_3xx = @db.execute status_query(3)
|
66
|
+
@by_day_2xx = @db.execute status_query(2)
|
67
|
+
|
68
|
+
all_statuses = @by_day_2xx + @by_day_3xx + @by_day_4xx + @by_day_5xx
|
69
|
+
@statuses_by_day = all_statuses.group_by { |x| x[0] }.to_a.map { |x|
|
70
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def aggregate_ips
|
75
|
+
if @table == "LogLine"
|
76
|
+
extra_cols = ", count(distinct(unique_visitor)), #{human_readable_size}"
|
77
|
+
else
|
78
|
+
extra_cols = ""
|
79
|
+
end
|
80
|
+
|
81
|
+
@ips = @db.execute %(SELECT ip, count(ip) #{extra_cols} from #{@table}
|
82
|
+
where #{filter}
|
83
|
+
group by ip
|
84
|
+
order by count(ip) desc
|
85
|
+
limit #{@options[:limit]}).gsub("\n", "")
|
86
|
+
|
87
|
+
@ips_per_hour = @db.execute ip_by_time_query("hour", "%H")
|
88
|
+
@ips_per_day = @db.execute ip_by_time_query("day", "%Y-%m-%d")
|
89
|
+
@ips_per_week = @db.execute ip_by_time_query("week", "%Y-%W")
|
90
|
+
|
91
|
+
@ips_per_day_detailed = @db.execute %(
|
92
|
+
SELECT ip,
|
93
|
+
strftime("%Y-%m-%d", #{@date_field}) as day,
|
94
|
+
#{@url_field}
|
95
|
+
from #{@table}
|
96
|
+
where #{filter} and ip != "" and #{@url_field} != "" and
|
97
|
+
#{@date_field} != ""
|
98
|
+
order by ip, #{@date_field}).gsub("\n", "")
|
99
|
+
end
|
100
|
+
|
101
|
+
def instance_vars_to_hash
|
102
|
+
data = {}
|
103
|
+
instance_variables.each do |variable|
|
104
|
+
var_as_symbol = variable.to_s[1..].to_sym
|
105
|
+
data[var_as_symbol] = instance_variable_get(variable)
|
106
|
+
end
|
107
|
+
data
|
108
|
+
end
|
109
|
+
|
110
|
+
def human_readable_size
|
111
|
+
mega = 1024 * 1024
|
112
|
+
giga = mega * 1024
|
113
|
+
tera = giga * 1024
|
114
|
+
|
115
|
+
%(CASE
|
116
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
117
|
+
WHEN sum(size) >= 1024 AND sum(size) < (#{mega})
|
118
|
+
THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
119
|
+
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga})
|
120
|
+
THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
121
|
+
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera})
|
122
|
+
THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
123
|
+
WHEN sum(size) >= (#{tera})
|
124
|
+
THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
125
|
+
END AS size).gsub("\n", "")
|
126
|
+
end
|
127
|
+
|
128
|
+
def human_readable_day
|
129
|
+
%(case cast (strftime('%w', #{@date_field}) as integer)
|
130
|
+
when 0 then 'Sunday'
|
131
|
+
when 1 then 'Monday'
|
132
|
+
when 2 then 'Tuesday'
|
133
|
+
when 3 then 'Wednesday'
|
134
|
+
when 4 then 'Thursday'
|
135
|
+
when 5 then 'Friday'
|
136
|
+
when 6 then 'Saturday'
|
137
|
+
else 'not specified'
|
138
|
+
end as dow).gsub("\n", "")
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
# generate the where clause corresponding to the command line options to filter data
|
143
|
+
#
|
144
|
+
def filter
|
145
|
+
from = @options[:from_date]
|
146
|
+
to = @options[:to_date]
|
147
|
+
|
148
|
+
[
|
149
|
+
(from ? "date(#{@date_field}) >= '#{from}'" : nil),
|
150
|
+
(to ? "date(#{@date_field}) <= '#{to}'" : nil),
|
151
|
+
(@options[:only_crawlers] ? "bot == 1" : nil),
|
152
|
+
(@options[:ignore_crawlers] ? "bot == 0" : nil),
|
153
|
+
(@options[:no_selfpolls] ? "ip != '::1'" : nil),
|
154
|
+
"true"
|
155
|
+
].compact.join " and "
|
156
|
+
end
|
157
|
+
|
158
|
+
private
|
159
|
+
|
160
|
+
# given 5 builds the query to get all lines with status 5xx
|
161
|
+
def status_query(status)
|
162
|
+
%(SELECT date(#{@date_field}), count(#{@date_field}) from #{@table}
|
163
|
+
where substr(status, 1,1) == '#{status}' and #{filter}
|
164
|
+
group by date(#{@date_field})).gsub("\n", "")
|
165
|
+
end
|
166
|
+
|
167
|
+
# given format string, group ip by time formatted with format string
|
168
|
+
# (e.g. by hour if format string is "%H")
|
169
|
+
# name is used to give the name to the column with formatted time
|
170
|
+
def ip_by_time_query(name, format_string)
|
171
|
+
%(SELECT ip,
|
172
|
+
strftime("%H", #{@date_field}) as #{name},
|
173
|
+
count(#{@url_field}) from #{@table}
|
174
|
+
where #{filter} and ip != "" and
|
175
|
+
#{@url_field} != "" and
|
176
|
+
#{@date_field} != ""
|
177
|
+
group by ip, #{name}
|
178
|
+
order by ip, #{@date_field}).gsub("\n", "")
|
179
|
+
end
|
180
|
+
|
181
|
+
def date_sel(date1, date2, method)
|
182
|
+
if date1 && date2
|
183
|
+
[date1, date2].send(method)
|
184
|
+
elsif date1
|
185
|
+
date1
|
186
|
+
else
|
187
|
+
date2
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|