log_sense 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +27 -0
- data/Gemfile.lock +6 -4
- data/README.org +108 -34
- data/Rakefile +6 -6
- data/exe/log_sense +110 -39
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/aggregator.rb +191 -0
- data/lib/log_sense/apache_aggregator.rb +122 -0
- data/lib/log_sense/apache_log_line_parser.rb +23 -21
- data/lib/log_sense/apache_log_parser.rb +15 -12
- data/lib/log_sense/apache_report_shaper.rb +309 -0
- data/lib/log_sense/emitter.rb +55 -553
- data/lib/log_sense/ip_locator.rb +24 -12
- data/lib/log_sense/options_checker.rb +24 -0
- data/lib/log_sense/options_parser.rb +81 -51
- data/lib/log_sense/rails_aggregator.rb +69 -0
- data/lib/log_sense/rails_log_parser.rb +82 -68
- data/lib/log_sense/rails_report_shaper.rb +183 -0
- data/lib/log_sense/report_shaper.rb +105 -0
- data/lib/log_sense/templates/_cdn_links.html.erb +11 -0
- data/lib/log_sense/templates/_command_invocation.html.erb +4 -0
- data/lib/log_sense/templates/_log_structure.html.erb +7 -1
- data/lib/log_sense/templates/_output_table.html.erb +6 -2
- data/lib/log_sense/templates/_rails.css.erb +7 -0
- data/lib/log_sense/templates/_summary.html.erb +9 -7
- data/lib/log_sense/templates/_summary.txt.erb +2 -2
- data/lib/log_sense/templates/{rails.html.erb → report_html.erb} +19 -37
- data/lib/log_sense/templates/{apache.txt.erb → report_txt.erb} +1 -1
- data/lib/log_sense/version.rb +1 -1
- data/lib/log_sense.rb +19 -9
- data/log_sense.gemspec +1 -1
- data/{apache-screenshot.png → screenshots/apache-screenshot.png} +0 -0
- data/screenshots/rails-screenshot.png +0 -0
- metadata +17 -11
- data/lib/log_sense/apache_data_cruncher.rb +0 -147
- data/lib/log_sense/rails_data_cruncher.rb +0 -141
- data/lib/log_sense/templates/apache.html.erb +0 -115
- data/lib/log_sense/templates/rails.txt.erb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d269dedfbb6ec6eae3a77491cc5ec7ca6241f388f2658964541cdd3983b8298
|
4
|
+
data.tar.gz: 6f24d23c8d06430b3605aad90522e08818cd18fd6c71c5fe90823cdc9483c81e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a63f715b281101a6f61029da3e2bcf5db4d47a537af562507b0184d6c6755eed436afed729c31cc95255bf064cbe9f487917c96d78199bbee25e6d4189469951
|
7
|
+
data.tar.gz: 77c62a24c3c81067dd0288ad606b732e0f112d0d1710b326be9e894aa72a93db4cb0a188c93b54ada728c2eeb0cf7378fb7033e13982c9a0932414c8455d2703
|
data/CHANGELOG.org
CHANGED
@@ -2,6 +2,33 @@
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
3
3
|
#+STARTUP: showall
|
4
4
|
|
5
|
+
* 1.6.0
|
6
|
+
|
7
|
+
- [User] New output format =ufw= generates directives to blacklist IPs
|
8
|
+
requesting URLs matching a pattern. For users of the Uncomplicated
|
9
|
+
Firewall.
|
10
|
+
- [User] new option =--no-geo= skips geolocation, which is terribly
|
11
|
+
costly in the current implementation.
|
12
|
+
- [User] Updated DB-IP country file to Dec 2022 version.
|
13
|
+
- [User] Changed name of SQLite output format to sqlite3
|
14
|
+
- [User] It is now possible to start analysis from a sqlite3 DB
|
15
|
+
generated by log_sense, breaking parsing and generation in two
|
16
|
+
steps.
|
17
|
+
- [User] Check for correctness of I/O formats before launching
|
18
|
+
analysis
|
19
|
+
- [User] Streak report has been renames Session. Limited the number
|
20
|
+
of URLs shown in each session, to avoid buffer?/memory overflows
|
21
|
+
when an IP requests a massive amount of URLs.
|
22
|
+
- [User] Added an IP-per-hour visits report.
|
23
|
+
- [Code] A rather extensive refactoring of the source code to
|
24
|
+
remove code duplications and improve code structure.
|
25
|
+
- [Code] Rubocop-ped various files
|
26
|
+
- [Code] Added text renderer to DataTable, which sanitizes input and
|
27
|
+
further reduces risks of XSS and log poisoning attacks
|
28
|
+
- [Code] CDN links have been ported into the Emitter module and used
|
29
|
+
in the Embedded Ruby Templates (erbs). This simplifies version
|
30
|
+
updates of Javascript libraries used in reports.
|
31
|
+
|
5
32
|
* 1.5.2
|
6
33
|
|
7
34
|
- [User] Updated DB-IP country file.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
log_sense (1.5.
|
4
|
+
log_sense (1.5.3)
|
5
5
|
browser
|
6
6
|
ipaddr
|
7
7
|
iso_country_codes
|
@@ -16,18 +16,20 @@ GEM
|
|
16
16
|
irb (>= 1.3.6)
|
17
17
|
reline (>= 0.3.1)
|
18
18
|
io-console (0.5.11)
|
19
|
-
ipaddr (1.2.
|
19
|
+
ipaddr (1.2.5)
|
20
20
|
irb (1.4.1)
|
21
21
|
reline (>= 0.3.0)
|
22
22
|
iso_country_codes (0.7.8)
|
23
|
+
mini_portile2 (2.8.0)
|
23
24
|
minitest (5.15.0)
|
24
25
|
rake (12.3.3)
|
25
26
|
reline (0.3.1)
|
26
27
|
io-console (~> 0.5)
|
27
|
-
sqlite3 (1.
|
28
|
+
sqlite3 (1.5.4)
|
29
|
+
mini_portile2 (~> 2.8.0)
|
28
30
|
terminal-table (3.0.2)
|
29
31
|
unicode-display_width (>= 1.1.1, < 3)
|
30
|
-
unicode-display_width (2.
|
32
|
+
unicode-display_width (2.3.0)
|
31
33
|
|
32
34
|
PLATFORMS
|
33
35
|
ruby
|
data/README.org
CHANGED
@@ -9,7 +9,7 @@ Rails logs. Written in Ruby, it runs from the command line, it is
|
|
9
9
|
fast, and it can be installed on any system with a relatively recent
|
10
10
|
version of Ruby. We tested on Ruby 2.6.9, Ruby 3.0.x and later.
|
11
11
|
|
12
|
-
LogSense reports the following data:
|
12
|
+
When generating reports, LogSense reports the following data:
|
13
13
|
|
14
14
|
- Visitors, hits, unique visitors, bandwidth used
|
15
15
|
- Most accessed HTML pages
|
@@ -22,18 +22,49 @@ LogSense reports the following data:
|
|
22
22
|
- IP Country location, thanks to the DP-IP lite country DB
|
23
23
|
- Streaks: resources accessed by a given IP over time
|
24
24
|
- Performance of Rails requests
|
25
|
+
|
26
|
+
A special output format =ufw= generates rules for the [[https://launchpad.net/ufw][Uncomplicated
|
27
|
+
Firewall]] to blacklist IPs requesting URLs matching a specific pattern.
|
25
28
|
|
26
29
|
Filters from the command line allow to analyze specific periods and
|
27
30
|
distinguish traffic generated by self polls and crawlers.
|
28
31
|
|
29
|
-
LogSense generates HTML, txt, and SQLite outputs.
|
32
|
+
LogSense generates HTML, txt, ufw, and SQLite outputs.
|
30
33
|
|
31
|
-
|
34
|
+
** Apache Report Structure
|
32
35
|
|
33
36
|
#+ATTR_HTML: :width 80%
|
34
|
-
[[file:./apache-screenshot.png]]
|
37
|
+
[[file:./screenshots/apache-screenshot.png]]
|
38
|
+
|
39
|
+
|
40
|
+
** Rails Report Structure
|
41
|
+
|
42
|
+
#+ATTR_HTML: :width 80%
|
43
|
+
[[file:./screenshots/rails-screenshot.png]]
|
44
|
+
|
45
|
+
|
46
|
+
** UFW Report
|
35
47
|
|
48
|
+
The output format =ufw= generates directives for Uncomplicated
|
49
|
+
Firewall blacklisting IPs requesting URLs matching a given pattern.
|
36
50
|
|
51
|
+
We use it to blacklist IPs requesting WordPress login pages on our
|
52
|
+
websites... since we don't use WordPress for our websites.
|
53
|
+
|
54
|
+
*Example*
|
55
|
+
|
56
|
+
#+begin_src
|
57
|
+
$ log_sense -f apache -t ufw -i apache.log
|
58
|
+
# /users/sign_in/xmlrpc.php?rsd
|
59
|
+
ufw deny from 20.212.3.206
|
60
|
+
|
61
|
+
# /wp-login.php /wordpress/wp-login.php /blog/wp-login.php /wp/wp-login.php
|
62
|
+
ufw deny from 185.255.134.18
|
63
|
+
|
64
|
+
...
|
65
|
+
#+end_src
|
66
|
+
|
67
|
+
|
37
68
|
* An important word of warning
|
38
69
|
|
39
70
|
[[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
|
@@ -48,9 +79,10 @@ opened or code executed.
|
|
48
79
|
* Motivation
|
49
80
|
|
50
81
|
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
|
51
|
-
strongly inspired the development of Log Sense) and [[https://umami.is/][Umami]],
|
52
|
-
*privacy* and *data-ownership*: the data generated by
|
53
|
-
stored on your computer and owned by you (like it should
|
82
|
+
strongly inspired the development of Log Sense) and [[https://umami.is/][Umami]], both
|
83
|
+
focusing on *privacy* and *data-ownership*: the data generated by
|
84
|
+
LogSense is stored on your computer and owned by you (like it should
|
85
|
+
be)[fn:1].
|
54
86
|
|
55
87
|
LogSense is also inspired by *static websites generators*: statistics
|
56
88
|
are generated from the command line and accessed as static HTML files.
|
@@ -76,33 +108,30 @@ generated files are then made available on a private area on the web.
|
|
76
108
|
#+RESULTS:
|
77
109
|
#+begin_example
|
78
110
|
Usage: log_sense [options] [logfile ...]
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
- sqlite
|
104
|
-
- html
|
105
|
-
- txt
|
111
|
+
--title=TITLE Title to use in the report
|
112
|
+
-f, --input-format=FORMAT Input format (either rails or apache)
|
113
|
+
-i, --input-files=file,file, Input files (can also be passed directly)
|
114
|
+
-t, --output-format=FORMAT Output format: html, org, txt, sqlite.
|
115
|
+
-o, --output-file=OUTPUT_FILE Output file
|
116
|
+
-b, --begin=DATE Consider entries after or on DATE
|
117
|
+
-e, --end=DATE Consider entries before or on DATE
|
118
|
+
-l, --limit=N Limit to the N most requested resources (defaults to 100)
|
119
|
+
-w, --width=WIDTH Maximum width of long columns in textual reports
|
120
|
+
-r, --rows=ROWS Maximum number of rows for columns with multiple entries in textual reports
|
121
|
+
-p, --pattern=PATTERN Pattern to use with ufw report to decide IP to blacklist
|
122
|
+
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
123
|
+
--no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
|
124
|
+
-n, --no-geog Do not geolocate entries
|
125
|
+
--verbose Inform about progress (output to STDERR)
|
126
|
+
-v, --version Prints version information
|
127
|
+
-h, --help Prints this help
|
128
|
+
|
129
|
+
This is version 1.6.0
|
130
|
+
|
131
|
+
Output formats:
|
132
|
+
|
133
|
+
- rails: txt, html, sqlite3, ufw
|
134
|
+
- apache: txt, html, sqlite3, ufw
|
106
135
|
#+end_example
|
107
136
|
|
108
137
|
Examples:
|
@@ -112,6 +141,51 @@ log_sense -f apache -i access.log -t txt > access-data.txt
|
|
112
141
|
log_sense -f rails -i production.log -t html -o performance.html
|
113
142
|
#+end_example
|
114
143
|
|
144
|
+
* Code Structure
|
145
|
+
|
146
|
+
The code implements a pipeline, with the following steps:
|
147
|
+
|
148
|
+
1. *Parser:* parses a log to a SQLite3 database. The database
|
149
|
+
contains a table with a list of events, and, in the case of Rails
|
150
|
+
report, a table with the errors.
|
151
|
+
2. *Aggregator:* takes as input a SQLite DB and aggregates data,
|
152
|
+
typically performing "group by", which are simpler to generate in
|
153
|
+
Ruby, rather than in SQL. The module outputs a Hash, with
|
154
|
+
different reporting data.
|
155
|
+
3. *GeoLocator:* add country information to all the reporting data
|
156
|
+
which has an IP as one the fields.
|
157
|
+
4. *Shaper:* makes (geolocated) aggregated data (e.g. Hashes and
|
158
|
+
such), into Array of Arrays, simplifying the structure of the code
|
159
|
+
building the reports.
|
160
|
+
5. *Emitter* generates reports from shaped data using ERB.
|
161
|
+
|
162
|
+
The architecture and the structure of the code is far from being nice,
|
163
|
+
for historical reason and for a bunch of small differences existing
|
164
|
+
between the input and the outputs to be generated. This usually ends
|
165
|
+
up with modifications to the code that have to be replicated in
|
166
|
+
different parts of the code and in interferences.
|
167
|
+
|
168
|
+
Among the points I would like to address:
|
169
|
+
|
170
|
+
- The execution pipeline in the main script has a few exceptions to
|
171
|
+
manage SQLite reading/dumping and ufw report. A linear structure
|
172
|
+
would be a lot nicer.
|
173
|
+
- Two different classes are defined for steps 1, 2, and 4, to manage,
|
174
|
+
respectively, Apache and Rails logs. These classes inherit from a
|
175
|
+
common ancestor (e.g. ApacheParser and RailsParser both inherit from
|
176
|
+
Parser), but there is still too little code shared. A nicer
|
177
|
+
approach would be that of identifying a common DB structure and
|
178
|
+
unify the pipeline up to (or including) the generation of
|
179
|
+
reports. There are a bunch of small different things to highlight in
|
180
|
+
reports, which still make this difficult. For instance, the country
|
181
|
+
report for Apache reports size of TX data, which is not available
|
182
|
+
for Rail reports.
|
183
|
+
- Geolocation could become a lot more efficient if performed in
|
184
|
+
SQLite, rather than in Ruby
|
185
|
+
- The distinction between Aggregation, Shaping, and Emission is a too
|
186
|
+
fine-grained and it would be nice to be able to cleanly remove one
|
187
|
+
of the steps.
|
188
|
+
|
115
189
|
|
116
190
|
* Change Log
|
117
191
|
|
data/Rakefile
CHANGED
@@ -9,18 +9,18 @@ end
|
|
9
9
|
require_relative './lib/log_sense/ip_locator.rb'
|
10
10
|
|
11
11
|
desc "Convert Geolocation DB to sqlite"
|
12
|
-
task :
|
13
|
-
filename =
|
12
|
+
task :dbip, [:filename] do |tasks, args|
|
13
|
+
filename = args[:filename]
|
14
14
|
|
15
15
|
if !File.exist? filename
|
16
16
|
puts "Error. Could not find: #{filename}"
|
17
17
|
puts
|
18
18
|
puts 'I see the following files:'
|
19
19
|
puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
|
20
|
-
puts
|
21
|
-
puts
|
22
|
-
puts
|
23
|
-
puts
|
20
|
+
puts
|
21
|
+
puts "1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite"
|
22
|
+
puts "2. Save downloaded file to ip_locations/"
|
23
|
+
puts "3. Relaunch with YYYY-MM"
|
24
24
|
|
25
25
|
exit
|
26
26
|
else
|
data/exe/log_sense
CHANGED
@@ -1,82 +1,153 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
3
|
+
require "log_sense"
|
4
|
+
require "sqlite3"
|
4
5
|
|
5
6
|
#
|
6
7
|
# Parse Command Line Arguments
|
7
8
|
#
|
8
9
|
|
9
10
|
# this better be here... OptionsParser consumes ARGV
|
10
|
-
@command_line = ARGV.join(
|
11
|
-
@options
|
12
|
-
@
|
11
|
+
@command_line = ARGV.join(" ")
|
12
|
+
@options = LogSense::OptionsParser.parse ARGV
|
13
|
+
@input_filenames = @options[:input_filenames] + ARGV
|
14
|
+
@output_filename = @options[:output_filename]
|
13
15
|
|
14
16
|
#
|
15
|
-
#
|
16
|
-
#
|
17
|
+
# Check correctness of input data.
|
18
|
+
#
|
19
|
+
|
20
|
+
#
|
21
|
+
# Check input files
|
17
22
|
#
|
18
|
-
@input_filenames = @options[:input_filenames] + ARGV
|
19
23
|
@non_existing = @input_filenames.reject { |x| File.exist?(x) }
|
20
24
|
|
21
|
-
|
22
|
-
|
25
|
+
if @non_existing.any?
|
26
|
+
warn "Error: some input file(s) \"#{@non_existing.join(", ")}\" do not exist"
|
27
|
+
exit 1
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Special condition: sqlite3 requires a single file as input
|
32
|
+
#
|
33
|
+
if @input_filenames.size > 0 &&
|
34
|
+
File.extname(@input_filenames.first) == "sqlite3" &&
|
35
|
+
@input_filenames.size > 1
|
36
|
+
warn "Error: you can pass only one sqlite3 file as input"
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Supported input/output chains
|
42
|
+
#
|
43
|
+
iformat = @options[:input_format]
|
44
|
+
oformat = @options[:output_format]
|
45
|
+
|
46
|
+
if !LogSense::OptionsChecker::compatible?(iformat, oformat)
|
47
|
+
warn "Error: don't know how to make #{iformat} into #{oformat}."
|
48
|
+
warn "Possible transformation chains:"
|
49
|
+
warn LogSense::OptionsChecker.chains_to_s
|
23
50
|
exit 1
|
24
51
|
end
|
25
|
-
@input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
|
26
52
|
|
27
53
|
#
|
28
|
-
#
|
54
|
+
# Do the work
|
29
55
|
#
|
30
56
|
|
31
57
|
@started_at = Time.now
|
32
58
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
when 'rails'
|
38
|
-
parser_klass = LogSense::RailsLogParser
|
39
|
-
cruncher_klass = LogSense::RailsDataCruncher
|
59
|
+
if @input_filenames.size > 0 &&
|
60
|
+
File.extname(@input_filenames.first) == ".sqlite3"
|
61
|
+
warn "Reading SQLite3 DB ..." if @options[:verbose]
|
62
|
+
@db = SQLite3::Database.open @input_filenames.first
|
40
63
|
else
|
41
|
-
|
42
|
-
|
64
|
+
warn "Parsing ..." if @options[:verbose]
|
65
|
+
@input_files = if @input_filenames.empty?
|
66
|
+
[$stdin]
|
67
|
+
else
|
68
|
+
@input_filenames.map { |fname| File.open(fname, "r") }
|
69
|
+
end
|
70
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}LogParser"
|
71
|
+
parser_class = Object.const_get class_name
|
72
|
+
parser = parser_class.new
|
73
|
+
@db = parser.parse @input_files
|
43
74
|
end
|
44
75
|
|
45
|
-
|
46
|
-
|
76
|
+
if @options[:output_format] == "sqlite3"
|
77
|
+
warn "Saving SQLite3 DB ..." if @options[:verbose]
|
47
78
|
|
48
|
-
|
49
|
-
|
50
|
-
ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
|
51
|
-
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
79
|
+
ddb = SQLite3::Database.new(@output_filename || "db.sqlite3")
|
80
|
+
b = SQLite3::Backup.new(ddb, "main", @db, "main")
|
52
81
|
b.step(-1) #=> DONE
|
53
82
|
b.finish
|
83
|
+
|
84
|
+
exit 0
|
85
|
+
elsif @options[:output_format] == "ufw"
|
86
|
+
pattern = @options[:pattern] || "php"
|
87
|
+
|
88
|
+
if @options[:input_format] == "rails"
|
89
|
+
query = "select distinct event.ip,event.url
|
90
|
+
from error join event
|
91
|
+
where event.log_id = error.log_id and
|
92
|
+
event.url like '%#{pattern}%'"
|
93
|
+
else
|
94
|
+
query = "select distinct ip,path from logline
|
95
|
+
where path like '%#{pattern}%'"
|
96
|
+
end
|
97
|
+
|
98
|
+
ips = @db.execute query
|
99
|
+
ips_and_urls = ips.group_by { |x| x[0] }.transform_values { |x|
|
100
|
+
x.map { |y| y[1..-1] }.flatten
|
101
|
+
}
|
102
|
+
ips_and_urls.each do |ip, urls|
|
103
|
+
puts "# #{urls[0..10].uniq.join(' ')}"
|
104
|
+
puts "ufw deny from #{ip}"
|
105
|
+
puts
|
106
|
+
end
|
107
|
+
|
108
|
+
exit 0
|
54
109
|
else
|
55
|
-
|
56
|
-
|
110
|
+
warn "Aggregating data ..." if @options[:verbose]
|
111
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}Aggregator"
|
112
|
+
aggr_class = Object.const_get class_name
|
113
|
+
aggr = aggr_class.new(@db, @options)
|
114
|
+
@data = aggr.aggregate
|
57
115
|
|
58
|
-
|
59
|
-
|
116
|
+
if @options[:geolocation]
|
117
|
+
warn "Geolocating ..." if @options[:verbose]
|
118
|
+
@data = LogSense::IpLocator.geolocate @data
|
60
119
|
|
61
|
-
|
62
|
-
|
63
|
-
|
120
|
+
warn "Grouping IPs by country ..." if @options[:verbose]
|
121
|
+
country_col = @data[:ips][0].size - 1
|
122
|
+
@data[:countries] = @data[:ips].group_by { |x| x[country_col] }
|
123
|
+
else
|
124
|
+
@data[:countries] = {}
|
125
|
+
end
|
64
126
|
|
65
127
|
@ended_at = Time.now
|
66
128
|
@duration = @ended_at - @started_at
|
67
129
|
|
68
130
|
@data = @data.merge({
|
69
131
|
command: @command_line,
|
70
|
-
filenames:
|
132
|
+
filenames: @input_filenames,
|
71
133
|
log_files: @input_files,
|
72
134
|
started_at: @started_at,
|
73
135
|
ended_at: @ended_at,
|
74
136
|
duration: @duration,
|
75
137
|
width: @options[:width]
|
76
138
|
})
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
139
|
+
|
140
|
+
if @options[:verbose]
|
141
|
+
warn "I have the following keys in data: "
|
142
|
+
warn @data.keys.sort.map { |key| "#{key}: #{@data[key].class}" }.join("\n")
|
143
|
+
end
|
144
|
+
|
145
|
+
warn "Shaping data for output ..." if @options[:verbose]
|
146
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}ReportShaper"
|
147
|
+
shaper_class = Object.const_get class_name
|
148
|
+
shaper = shaper_class.new
|
149
|
+
@reports = shaper.shape @data
|
150
|
+
|
151
|
+
warn "Emitting..." if @options[:verbose]
|
152
|
+
puts LogSense::Emitter.emit @reports, @data, @options
|
82
153
|
end
|
Binary file
|
@@ -0,0 +1,191 @@
|
|
1
|
+
module LogSense
|
2
|
+
class Aggregator
|
3
|
+
def initialize
|
4
|
+
# not meant to be used directly
|
5
|
+
raise StandardError
|
6
|
+
end
|
7
|
+
|
8
|
+
protected
|
9
|
+
|
10
|
+
def logged_query(query)
|
11
|
+
puts query
|
12
|
+
@db.execute query
|
13
|
+
end
|
14
|
+
|
15
|
+
def aggregate_log_info
|
16
|
+
first_day_s = @db.execute "SELECT #{@date_field} from #{@table}
|
17
|
+
where #{@date_field} not NULL
|
18
|
+
order by #{@date_field}
|
19
|
+
limit 1"
|
20
|
+
last_day_s = @db.execute "SELECT #{@date_field} from #{@table}
|
21
|
+
where #{@date_field} not NULL
|
22
|
+
order by #{@date_field} desc
|
23
|
+
limit 1"
|
24
|
+
|
25
|
+
# make first and last day into dates or nil
|
26
|
+
@first_day = first_day_s&.first&.first ? Date.parse(first_day_s[0][0]) : nil
|
27
|
+
@last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
|
28
|
+
|
29
|
+
@total_days = 0
|
30
|
+
@total_days = (@last_day - @first_day).to_i if @first_day && @last_day
|
31
|
+
|
32
|
+
evs = @db.execute "SELECT count(#{@date_field}) from #{@table}"
|
33
|
+
@events_in_log = @log_size = evs[0][0]
|
34
|
+
|
35
|
+
evs = @db.execute "SELECT count(#{@date_field}) from #{@table} where #{filter}"
|
36
|
+
@events = evs[0][0]
|
37
|
+
|
38
|
+
@source_files = @db.execute "SELECT distinct(source_file) from #{@table}"
|
39
|
+
|
40
|
+
tuv = @db.execute "SELECT count(distinct(unique_visitor)) from #{@table}
|
41
|
+
where #{filter}"
|
42
|
+
@total_unique_visits = tuv[0][0]
|
43
|
+
|
44
|
+
@first_day_requested = @options[:from_date]
|
45
|
+
@last_day_requested = @options[:to_date]
|
46
|
+
|
47
|
+
@first_day_in_analysis = date_sel @first_day_requested, @first_day, :max
|
48
|
+
@last_day_in_analysis = date_sel @last_day_requested, @last_day, :min
|
49
|
+
|
50
|
+
@total_days_in_analysis = 0
|
51
|
+
if @first_day_in_analysis && @last_day_in_analysis
|
52
|
+
diff = (@last_day_in_analysis - @first_day_in_analysis).to_i
|
53
|
+
@total_days_in_analysis = diff
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def aggregate_statuses
|
58
|
+
@statuses = @db.execute %(SELECT status, count(status) from #{@table}
|
59
|
+
where #{filter}
|
60
|
+
group by status
|
61
|
+
order by status)
|
62
|
+
|
63
|
+
@by_day_5xx = @db.execute status_query(5)
|
64
|
+
@by_day_4xx = @db.execute status_query(4)
|
65
|
+
@by_day_3xx = @db.execute status_query(3)
|
66
|
+
@by_day_2xx = @db.execute status_query(2)
|
67
|
+
|
68
|
+
all_statuses = @by_day_2xx + @by_day_3xx + @by_day_4xx + @by_day_5xx
|
69
|
+
@statuses_by_day = all_statuses.group_by { |x| x[0] }.to_a.map { |x|
|
70
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def aggregate_ips
|
75
|
+
if @table == "LogLine"
|
76
|
+
extra_cols = ", count(distinct(unique_visitor)), #{human_readable_size}"
|
77
|
+
else
|
78
|
+
extra_cols = ""
|
79
|
+
end
|
80
|
+
|
81
|
+
@ips = @db.execute %(SELECT ip, count(ip) #{extra_cols} from #{@table}
|
82
|
+
where #{filter}
|
83
|
+
group by ip
|
84
|
+
order by count(ip) desc
|
85
|
+
limit #{@options[:limit]}).gsub("\n", "")
|
86
|
+
|
87
|
+
@ips_per_hour = @db.execute ip_by_time_query("hour", "%H")
|
88
|
+
@ips_per_day = @db.execute ip_by_time_query("day", "%Y-%m-%d")
|
89
|
+
@ips_per_week = @db.execute ip_by_time_query("week", "%Y-%W")
|
90
|
+
|
91
|
+
@ips_per_day_detailed = @db.execute %(
|
92
|
+
SELECT ip,
|
93
|
+
strftime("%Y-%m-%d", #{@date_field}) as day,
|
94
|
+
#{@url_field}
|
95
|
+
from #{@table}
|
96
|
+
where #{filter} and ip != "" and #{@url_field} != "" and
|
97
|
+
#{@date_field} != ""
|
98
|
+
order by ip, #{@date_field}).gsub("\n", "")
|
99
|
+
end
|
100
|
+
|
101
|
+
def instance_vars_to_hash
|
102
|
+
data = {}
|
103
|
+
instance_variables.each do |variable|
|
104
|
+
var_as_symbol = variable.to_s[1..].to_sym
|
105
|
+
data[var_as_symbol] = instance_variable_get(variable)
|
106
|
+
end
|
107
|
+
data
|
108
|
+
end
|
109
|
+
|
110
|
+
def human_readable_size
|
111
|
+
mega = 1024 * 1024
|
112
|
+
giga = mega * 1024
|
113
|
+
tera = giga * 1024
|
114
|
+
|
115
|
+
%(CASE
|
116
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
117
|
+
WHEN sum(size) >= 1024 AND sum(size) < (#{mega})
|
118
|
+
THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
119
|
+
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga})
|
120
|
+
THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
121
|
+
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera})
|
122
|
+
THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
123
|
+
WHEN sum(size) >= (#{tera})
|
124
|
+
THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
125
|
+
END AS size).gsub("\n", "")
|
126
|
+
end
|
127
|
+
|
128
|
+
def human_readable_day
|
129
|
+
%(case cast (strftime('%w', #{@date_field}) as integer)
|
130
|
+
when 0 then 'Sunday'
|
131
|
+
when 1 then 'Monday'
|
132
|
+
when 2 then 'Tuesday'
|
133
|
+
when 3 then 'Wednesday'
|
134
|
+
when 4 then 'Thursday'
|
135
|
+
when 5 then 'Friday'
|
136
|
+
when 6 then 'Saturday'
|
137
|
+
else 'not specified'
|
138
|
+
end as dow).gsub("\n", "")
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
# generate the where clause corresponding to the command line options to filter data
|
143
|
+
#
|
144
|
+
def filter
|
145
|
+
from = @options[:from_date]
|
146
|
+
to = @options[:to_date]
|
147
|
+
|
148
|
+
[
|
149
|
+
(from ? "date(#{@date_field}) >= '#{from}'" : nil),
|
150
|
+
(to ? "date(#{@date_field}) <= '#{to}'" : nil),
|
151
|
+
(@options[:only_crawlers] ? "bot == 1" : nil),
|
152
|
+
(@options[:ignore_crawlers] ? "bot == 0" : nil),
|
153
|
+
(@options[:no_selfpolls] ? "ip != '::1'" : nil),
|
154
|
+
"true"
|
155
|
+
].compact.join " and "
|
156
|
+
end
|
157
|
+
|
158
|
+
private
|
159
|
+
|
160
|
+
# given 5 builds the query to get all lines with status 5xx
|
161
|
+
def status_query(status)
|
162
|
+
%(SELECT date(#{@date_field}), count(#{@date_field}) from #{@table}
|
163
|
+
where substr(status, 1,1) == '#{status}' and #{filter}
|
164
|
+
group by date(#{@date_field})).gsub("\n", "")
|
165
|
+
end
|
166
|
+
|
167
|
+
# given format string, group ip by time formatted with format string
|
168
|
+
# (e.g. by hour if format string is "%H")
|
169
|
+
# name is used to give the name to the column with formatted time
|
170
|
+
def ip_by_time_query(name, format_string)
|
171
|
+
%(SELECT ip,
|
172
|
+
strftime("%H", #{@date_field}) as #{name},
|
173
|
+
count(#{@url_field}) from #{@table}
|
174
|
+
where #{filter} and ip != "" and
|
175
|
+
#{@url_field} != "" and
|
176
|
+
#{@date_field} != ""
|
177
|
+
group by ip, #{name}
|
178
|
+
order by ip, #{@date_field}).gsub("\n", "")
|
179
|
+
end
|
180
|
+
|
181
|
+
def date_sel(date1, date2, method)
|
182
|
+
if date1 && date2
|
183
|
+
[date1, date2].send(method)
|
184
|
+
elsif date1
|
185
|
+
date1
|
186
|
+
else
|
187
|
+
date2
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|