log_sense 1.6.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +17 -0
- data/Gemfile.lock +20 -14
- data/README.org +46 -46
- data/Rakefile +9 -10
- data/exe/log_sense +36 -19
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/apache/log_line_parser.rb +59 -0
- data/lib/log_sense/apache/log_parser.rb +101 -0
- data/lib/log_sense/apache_report_shaper.rb +267 -163
- data/lib/log_sense/emitter.rb +27 -23
- data/lib/log_sense/ip_locator.rb +8 -5
- data/lib/log_sense/options/checker.rb +26 -0
- data/lib/log_sense/options/parser.rb +170 -0
- data/lib/log_sense/rails/log_parser.rb +469 -0
- data/lib/log_sense/rails_aggregator.rb +74 -1
- data/lib/log_sense/rails_report_shaper.rb +331 -108
- data/lib/log_sense/templates/_cdn_links.html.erb +0 -4
- data/lib/log_sense/templates/_log_structure.html.erb +6 -6
- data/lib/log_sense/templates/_performance.html.erb +6 -5
- data/lib/log_sense/templates/_rails.css.erb +16 -3
- data/lib/log_sense/templates/_report_data.html.erb +1 -1
- data/lib/log_sense/templates/_stylesheet.css +45 -25
- data/lib/log_sense/templates/_summary.html.erb +8 -8
- data/lib/log_sense/templates/report_html.erb +19 -2
- data/lib/log_sense/version.rb +1 -1
- data/lib/log_sense.rb +4 -4
- metadata +8 -8
- data/lib/log_sense/apache_log_line_parser.rb +0 -57
- data/lib/log_sense/apache_log_parser.rb +0 -100
- data/lib/log_sense/options_checker.rb +0 -24
- data/lib/log_sense/options_parser.rb +0 -147
- data/lib/log_sense/rails_log_parser.rb +0 -313
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 40a7ce48ddf73fc434bd0df0265e5d16d39e5034b62714f0324ea2a8b56950ca
|
|
4
|
+
data.tar.gz: c6e4ce814d0276f3a2c2aeaa20051772695c8bf8c8f53cbe10440415bb301dec
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3b31c02a784d0e3059500f774b090fd40e099d1facd2382195096194ccdecebb5ce601b26b6c04874cc7446e6e625e91be15966d425fc050f3b0be50c67a6985
|
|
7
|
+
data.tar.gz: 9b79e356ebcaa21b207c24c9ba4c78e39c6865deb4925413aca51b0ce1b705918ca30b61aa205be447547ac428e038239807f82046a9842f7a9d7753037c5fa5
|
data/CHANGELOG.org
CHANGED
|
@@ -2,6 +2,23 @@
|
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
|
3
3
|
#+STARTUP: showall
|
|
4
4
|
|
|
5
|
+
* 1.8.0
|
|
6
|
+
|
|
7
|
+
- Various improvements to the Rails reports
|
|
8
|
+
- Support for BrowserInfo data
|
|
9
|
+
|
|
10
|
+
* 1.7.0
|
|
11
|
+
|
|
12
|
+
- [User] Fixes a bug with the geolocator
|
|
13
|
+
- [User] Fixes a bug causing a crash when no country was found by the geolocator
|
|
14
|
+
- [User] Fixes bugs related to corner cases (empty logs, wrong parser for log,
|
|
15
|
+
empty geolocation data)
|
|
16
|
+
- [User] Updated DB-IP country file to Jun 2024 version.
|
|
17
|
+
- [User] Refreshed the style a bit, removed Fira Sans and updated versions of
|
|
18
|
+
CSS and JS frameworks
|
|
19
|
+
- [Code] Move options and some code in their own dir
|
|
20
|
+
- [Code] Add rendering view parsing (useful in development; no views yet)
|
|
21
|
+
|
|
5
22
|
* 1.6.1
|
|
6
23
|
|
|
7
24
|
- Country DB now stores country name.
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
log_sense (1.
|
|
4
|
+
log_sense (1.7.0)
|
|
5
5
|
browser
|
|
6
6
|
ipaddr
|
|
7
7
|
iso_country_codes
|
|
@@ -12,24 +12,30 @@ GEM
|
|
|
12
12
|
remote: https://rubygems.org/
|
|
13
13
|
specs:
|
|
14
14
|
browser (5.3.1)
|
|
15
|
-
debug (1.
|
|
16
|
-
irb (
|
|
17
|
-
reline (>= 0.3.
|
|
18
|
-
io-console (0.
|
|
19
|
-
ipaddr (1.2.
|
|
20
|
-
irb (1.
|
|
21
|
-
|
|
15
|
+
debug (1.9.2)
|
|
16
|
+
irb (~> 1.10)
|
|
17
|
+
reline (>= 0.3.8)
|
|
18
|
+
io-console (0.7.2)
|
|
19
|
+
ipaddr (1.2.6)
|
|
20
|
+
irb (1.13.1)
|
|
21
|
+
rdoc (>= 4.0.0)
|
|
22
|
+
reline (>= 0.4.2)
|
|
22
23
|
iso_country_codes (0.7.8)
|
|
23
|
-
mini_portile2 (2.8.
|
|
24
|
-
minitest (5.
|
|
24
|
+
mini_portile2 (2.8.7)
|
|
25
|
+
minitest (5.23.1)
|
|
26
|
+
psych (5.1.2)
|
|
27
|
+
stringio
|
|
25
28
|
rake (12.3.3)
|
|
26
|
-
|
|
29
|
+
rdoc (6.7.0)
|
|
30
|
+
psych (>= 4.0.0)
|
|
31
|
+
reline (0.5.8)
|
|
27
32
|
io-console (~> 0.5)
|
|
28
|
-
sqlite3 (
|
|
33
|
+
sqlite3 (2.0.2)
|
|
29
34
|
mini_portile2 (~> 2.8.0)
|
|
35
|
+
stringio (3.1.0)
|
|
30
36
|
terminal-table (3.0.2)
|
|
31
37
|
unicode-display_width (>= 1.1.1, < 3)
|
|
32
|
-
unicode-display_width (2.
|
|
38
|
+
unicode-display_width (2.5.0)
|
|
33
39
|
|
|
34
40
|
PLATFORMS
|
|
35
41
|
ruby
|
|
@@ -41,4 +47,4 @@ DEPENDENCIES
|
|
|
41
47
|
rake (~> 12.0)
|
|
42
48
|
|
|
43
49
|
BUNDLED WITH
|
|
44
|
-
2.
|
|
50
|
+
2.5.3
|
data/README.org
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
#+TITLE:
|
|
1
|
+
#+TITLE: LogSense Readme - Monitor your Rails app easy and fast
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
|
3
3
|
#+STARTUP: showall
|
|
4
4
|
|
|
5
5
|
* Introduction
|
|
6
6
|
|
|
7
|
-
LogSense generates reports and statistics from Apache and Ruby on
|
|
8
|
-
|
|
7
|
+
LogSense generates reports and statistics from Apache and Ruby on Rails log
|
|
8
|
+
files. All the statistics you need to monitor your application, its
|
|
9
|
+
performances, and how users access your app. Since it collects data from logs,
|
|
10
|
+
there is no need for cookies or other tracking technologies.
|
|
11
|
+
|
|
12
|
+
LogSense is Written in Ruby, it runs from the command line, it is
|
|
9
13
|
fast, and it can be installed on any system with a relatively recent
|
|
10
14
|
version of Ruby. We tested on Ruby 2.6.9, Ruby 3.0.x and later.
|
|
11
15
|
|
|
@@ -22,6 +26,11 @@ When generating reports, LogSense reports the following data:
|
|
|
22
26
|
- IP Country location, thanks to the DP-IP lite country DB
|
|
23
27
|
- Streaks: resources accessed by a given IP over time
|
|
24
28
|
- Performance of Rails requests
|
|
29
|
+
- Rails Fatal Errors (with reference to the logs)
|
|
30
|
+
|
|
31
|
+
LogSense parses also the data generated by BrowserInfo, providing additional
|
|
32
|
+
information for Rails apps, including devices and platforms and number of
|
|
33
|
+
accesses to methods by device type.
|
|
25
34
|
|
|
26
35
|
A special output format =ufw= generates rules for the [[https://launchpad.net/ufw][Uncomplicated
|
|
27
36
|
Firewall]] to blacklist IPs requesting URLs matching a specific pattern.
|
|
@@ -31,16 +40,16 @@ distinguish traffic generated by self polls and crawlers.
|
|
|
31
40
|
|
|
32
41
|
LogSense generates HTML, txt, ufw, and SQLite outputs.
|
|
33
42
|
|
|
34
|
-
**
|
|
43
|
+
** Rails Report Structure
|
|
35
44
|
|
|
36
45
|
#+ATTR_HTML: :width 80%
|
|
37
|
-
[[file:./screenshots/
|
|
46
|
+
[[file:./screenshots/rails-screenshot.png]]
|
|
38
47
|
|
|
39
48
|
|
|
40
|
-
**
|
|
49
|
+
** Apache Report Structure
|
|
41
50
|
|
|
42
51
|
#+ATTR_HTML: :width 80%
|
|
43
|
-
[[file:./screenshots/
|
|
52
|
+
[[file:./screenshots/apache-screenshot.png]]
|
|
44
53
|
|
|
45
54
|
|
|
46
55
|
** UFW Report
|
|
@@ -78,19 +87,16 @@ opened or code executed.
|
|
|
78
87
|
|
|
79
88
|
* Motivation
|
|
80
89
|
|
|
81
|
-
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]]
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
LogSense is stored on your computer and owned by you (like it should
|
|
85
|
-
be)[fn:1].
|
|
86
|
-
|
|
87
|
-
LogSense is also inspired by *static websites generators*: statistics
|
|
88
|
-
are generated from the command line and accessed as static HTML files.
|
|
89
|
-
LogSense thus significantly reduces the attack surface of your
|
|
90
|
-
web server and installation headaches. We have, for instance, a cron
|
|
91
|
-
job running on our servers, generating statistics at night. The
|
|
92
|
-
generated files are then made available on a private area on the web.
|
|
90
|
+
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] and [[https://umami.is/][Umami]], focusing on
|
|
91
|
+
*privacy*, *data-ownership*, and *simplicity*: no need to install JavaScript
|
|
92
|
+
snippets, no tracking cookies, just plain and simple log analysis.
|
|
93
93
|
|
|
94
|
+
LogSense is also inspired by *static websites generators*: statistics are
|
|
95
|
+
generated from the command line and accessed as static HTML files. This
|
|
96
|
+
significantly reduces the attack surface of your web server and installation
|
|
97
|
+
headaches. We have, for instance, a cron job running on our servers, generating
|
|
98
|
+
statistics at night. The generated files are then made available on a private
|
|
99
|
+
area on the web.
|
|
94
100
|
|
|
95
101
|
* Installation
|
|
96
102
|
|
|
@@ -98,7 +104,6 @@ generated files are then made available on a private area on the web.
|
|
|
98
104
|
gem install log_sense
|
|
99
105
|
#+end_src
|
|
100
106
|
|
|
101
|
-
|
|
102
107
|
* Usage
|
|
103
108
|
|
|
104
109
|
#+begin_src bash :results raw output :wrap example
|
|
@@ -108,25 +113,25 @@ generated files are then made available on a private area on the web.
|
|
|
108
113
|
#+RESULTS:
|
|
109
114
|
#+begin_example
|
|
110
115
|
Usage: log_sense [options] [logfile ...]
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
This is version 1.
|
|
116
|
+
--title=TITLE Title to use in the report
|
|
117
|
+
-f, --input-format=FORMAT Log format (stored in log or sqlite3): rails or apache (DEFAULT: apache)
|
|
118
|
+
-i, --input-files=file,file, Input file(s), log file or sqlite3 (can also be passed as arguments)
|
|
119
|
+
-t, --output-format=FORMAT Output format: html, txt, sqlite, ufw (DEFAULT: html)
|
|
120
|
+
-o, --output-file=OUTPUT_FILE Output file. (DEFAULT: STDOUT)
|
|
121
|
+
-b, --begin=DATE Consider only entries after or on DATE
|
|
122
|
+
-e, --end=DATE Consider only entries before or on DATE
|
|
123
|
+
-l, --limit=N Limit to the N most requested resources (DEFAULT: 100)
|
|
124
|
+
-w, --width=WIDTH Maximum width of long columns in textual reports
|
|
125
|
+
-r, --rows=ROWS Maximum number of rows for columns with multiple entries in textual reports
|
|
126
|
+
-p, --pattern=PATTERN Pattern to use with ufw report to select IP to blacklist (DEFAULT: php)
|
|
127
|
+
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
|
128
|
+
--no-selfpoll Ignore self poll entries (requests from ::1; applies to Apache Logs) (DEFAULT: false)
|
|
129
|
+
--no-geo Do not geolocate entries (DEFAULT: true)
|
|
130
|
+
--verbose Inform about progress (output to STDERR) (DEFAULT: false)
|
|
131
|
+
-v, --version Prints version information
|
|
132
|
+
-h, --help Prints this help
|
|
133
|
+
|
|
134
|
+
This is version 1.8.0
|
|
130
135
|
|
|
131
136
|
Output formats:
|
|
132
137
|
|
|
@@ -210,8 +215,8 @@ Concerning the outputs:
|
|
|
210
215
|
|
|
211
216
|
* Known Bugs
|
|
212
217
|
|
|
213
|
-
|
|
214
|
-
|
|
218
|
+
We have been running LogSense for quite a few years with no particular issues.
|
|
219
|
+
There are no known bugs; there is an unknown number of unknown bugs.
|
|
215
220
|
|
|
216
221
|
* License
|
|
217
222
|
|
|
@@ -220,8 +225,3 @@ Source code distributed under the terms of the [[http://opensource.org/licenses/
|
|
|
220
225
|
Geolocation is made possible by the DB-IP.com IP to City database,
|
|
221
226
|
released under a CC license.
|
|
222
227
|
|
|
223
|
-
[fn:1] There is a small catch: CSS and JavaScript for layout and plots
|
|
224
|
-
are downloaded from a CDN. Technically, thus, if you generate HTML
|
|
225
|
-
reports and open them, a request is performed and the CDN might keep a
|
|
226
|
-
track (see [[https://en.wikipedia.org/wiki/Content_delivery_network#Security_and_privacy][CDN Security and Privacy on Wikipedia]] for more details).
|
|
227
|
-
Textual reports don't have this issue.
|
data/Rakefile
CHANGED
|
@@ -8,9 +8,9 @@ end
|
|
|
8
8
|
|
|
9
9
|
require_relative './lib/log_sense/ip_locator.rb'
|
|
10
10
|
|
|
11
|
-
desc "Convert Geolocation DB to sqlite"
|
|
11
|
+
desc "Convert Geolocation DB to sqlite (arg YYYY_MM or filename)"
|
|
12
12
|
task :dbip, [:filename] do |tasks, args|
|
|
13
|
-
filename_or_yyyy_mm = args[:filename]
|
|
13
|
+
filename_or_yyyy_mm = args[:filename] || ""
|
|
14
14
|
|
|
15
15
|
filename = if /\d{4}-\d{2}/.match(filename_or_yyyy_mm)
|
|
16
16
|
"ip_locations/dbip-country-lite-#{filename_or_yyyy_mm}.csv"
|
|
@@ -18,13 +18,15 @@ task :dbip, [:filename] do |tasks, args|
|
|
|
18
18
|
filename_or_yyyy_mm
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
-
# if the filename has a .gz extension or a gzipped version
|
|
22
|
-
# exists, gunzip it
|
|
21
|
+
# if the filename passed as argument has a .gz extension or a gzipped version
|
|
22
|
+
# of the file passed as argument exists, gunzip it
|
|
23
23
|
if File.extname(filename) == ".gz" || File.exist?("#{filename}.gz")
|
|
24
24
|
system "gunzip #{filename}.gz"
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
if
|
|
27
|
+
if File.exist? filename
|
|
28
|
+
LogSense::IpLocator::dbip_to_sqlite filename
|
|
29
|
+
else
|
|
28
30
|
puts <<-EOS
|
|
29
31
|
Error. Could not find: #{filename}
|
|
30
32
|
|
|
@@ -37,13 +39,10 @@ I see the following files:
|
|
|
37
39
|
3. Relaunch with YYYY-MM (will build: dbip-country-lite-YYYY-MM.csv)
|
|
38
40
|
or with filename.
|
|
39
41
|
|
|
40
|
-
Remark. If the filename has the extension .gz or if the
|
|
41
|
-
|
|
42
|
-
exists, it is gunzipped first
|
|
42
|
+
Remark. If the filename has the extension .gz or if the filename does not exist,
|
|
43
|
+
but a file with the same name and .gz extension exists, it is gunzipped first
|
|
43
44
|
EOS
|
|
44
45
|
|
|
45
46
|
exit
|
|
46
|
-
else
|
|
47
|
-
LogSense::IpLocator::dbip_to_sqlite filename
|
|
48
47
|
end
|
|
49
48
|
end
|
data/exe/log_sense
CHANGED
|
@@ -9,7 +9,7 @@ require "sqlite3"
|
|
|
9
9
|
|
|
10
10
|
# this better be here... OptionsParser consumes ARGV
|
|
11
11
|
@command_line = ARGV.join(" ")
|
|
12
|
-
@options = LogSense::
|
|
12
|
+
@options = LogSense::Options::Parser.parse ARGV
|
|
13
13
|
@input_filenames = @options[:input_filenames] + ARGV
|
|
14
14
|
@output_filename = @options[:output_filename]
|
|
15
15
|
|
|
@@ -20,33 +20,36 @@ require "sqlite3"
|
|
|
20
20
|
#
|
|
21
21
|
# Check input files
|
|
22
22
|
#
|
|
23
|
-
@non_existing = @input_filenames.reject { |x| File.exist?(x) }
|
|
24
23
|
|
|
24
|
+
@non_existing = @input_filenames.reject { |x| File.exist?(x) }
|
|
25
25
|
if @non_existing.any?
|
|
26
26
|
warn "Error: some input file(s) \"#{@non_existing.join(", ")}\" do not exist"
|
|
27
27
|
exit 1
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
if @input_filenames.size > 0 &&
|
|
34
|
-
File.extname(@input_filenames.first) == "sqlite3" &&
|
|
35
|
-
@input_filenames.size > 1
|
|
36
|
-
warn "Error: you can pass only one sqlite3 file as input"
|
|
30
|
+
@sqlite3_files = @input_filenames.select { |x| File.extname(x).include?("sqlite") }
|
|
31
|
+
if @sqlite3_files.any? && @input_filenames.size != 1
|
|
32
|
+
warn "Error: when passing an SQLite3 DB, this has to be the only input file"
|
|
37
33
|
exit 1
|
|
38
34
|
end
|
|
39
35
|
|
|
36
|
+
#
|
|
37
|
+
# Check output files
|
|
38
|
+
#
|
|
39
|
+
|
|
40
|
+
# Nothing to be done, here, since we output to STDOUT if no output filename is
|
|
41
|
+
# specified
|
|
42
|
+
|
|
40
43
|
#
|
|
41
44
|
# Supported input/output chains
|
|
42
45
|
#
|
|
43
46
|
iformat = @options[:input_format]
|
|
44
47
|
oformat = @options[:output_format]
|
|
45
48
|
|
|
46
|
-
if !LogSense::
|
|
49
|
+
if !LogSense::Options::Checker.compatible?(iformat, oformat)
|
|
47
50
|
warn "Error: don't know how to make #{iformat} into #{oformat}."
|
|
48
51
|
warn "Possible transformation chains:"
|
|
49
|
-
warn LogSense::
|
|
52
|
+
warn LogSense::Options::Checker.chains_to_s
|
|
50
53
|
exit 1
|
|
51
54
|
end
|
|
52
55
|
|
|
@@ -56,8 +59,11 @@ end
|
|
|
56
59
|
|
|
57
60
|
@started_at = Time.now
|
|
58
61
|
|
|
59
|
-
|
|
60
|
-
|
|
62
|
+
#
|
|
63
|
+
# Input
|
|
64
|
+
#
|
|
65
|
+
|
|
66
|
+
if @input_filenames.size > 0 && File.extname(@input_filenames.first) == ".sqlite3"
|
|
61
67
|
warn "Reading SQLite3 DB ..." if @options[:verbose]
|
|
62
68
|
@db = SQLite3::Database.open @input_filenames.first
|
|
63
69
|
else
|
|
@@ -67,12 +73,19 @@ else
|
|
|
67
73
|
else
|
|
68
74
|
@input_filenames.map { |fname| File.open(fname, "r") }
|
|
69
75
|
end
|
|
70
|
-
|
|
76
|
+
|
|
77
|
+
class_name = "LogSense::#{@options[:input_format].capitalize}::LogParser"
|
|
71
78
|
parser_class = Object.const_get class_name
|
|
72
79
|
parser = parser_class.new
|
|
73
80
|
@db = parser.parse @input_files
|
|
74
81
|
end
|
|
75
82
|
|
|
83
|
+
#
|
|
84
|
+
# Output
|
|
85
|
+
#
|
|
86
|
+
|
|
87
|
+
# TODO this code could benefit from some classes abstracting the work a bit
|
|
88
|
+
|
|
76
89
|
if @options[:output_format] == "sqlite3"
|
|
77
90
|
warn "Saving SQLite3 DB ..." if @options[:verbose]
|
|
78
91
|
|
|
@@ -83,7 +96,7 @@ if @options[:output_format] == "sqlite3"
|
|
|
83
96
|
|
|
84
97
|
exit 0
|
|
85
98
|
elsif @options[:output_format] == "ufw"
|
|
86
|
-
pattern = @options[:pattern]
|
|
99
|
+
pattern = @options[:pattern]
|
|
87
100
|
|
|
88
101
|
if @options[:input_format] == "rails"
|
|
89
102
|
query = "select distinct event.ip,event.url
|
|
@@ -113,14 +126,18 @@ else
|
|
|
113
126
|
aggr = aggr_class.new(@db, @options)
|
|
114
127
|
@data = aggr.aggregate
|
|
115
128
|
|
|
116
|
-
if @options[:geolocation]
|
|
129
|
+
if @options[:geolocation] && @data[:ips].size != 0
|
|
117
130
|
warn "Geolocating ..." if @options[:verbose]
|
|
118
|
-
|
|
131
|
+
geolocated_data = LogSense::IpLocator.geolocate @data
|
|
119
132
|
|
|
120
133
|
warn "Grouping IPs by country ..." if @options[:verbose]
|
|
121
|
-
country_col =
|
|
122
|
-
@data[:countries] =
|
|
134
|
+
country_col = geolocated_data[0].size - 1
|
|
135
|
+
@data[:countries] = geolocated_data.group_by { |x| x[country_col] }
|
|
136
|
+
elsif @options[:geolocation] && @data[:ips].size == 0
|
|
137
|
+
warn "Skipping geolocation: no IP found" if @options[:verbose]
|
|
138
|
+
@data[:countries] = {}
|
|
123
139
|
else
|
|
140
|
+
warn "Skipping geolocation." if @options[:verbose]
|
|
124
141
|
@data[:countries] = {}
|
|
125
142
|
end
|
|
126
143
|
|
|
Binary file
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
module LogSense
|
|
2
|
+
module Apache
|
|
3
|
+
# parses a log line and returns a hash
|
|
4
|
+
# LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
|
|
5
|
+
#
|
|
6
|
+
# %h: IP
|
|
7
|
+
# %l: ident or -
|
|
8
|
+
# %u: userid or -
|
|
9
|
+
# %t: [10/Oct/2000:13:55:36 -0700]
|
|
10
|
+
# day = 2*digit
|
|
11
|
+
# month = 3*letter
|
|
12
|
+
# year = 4*digit
|
|
13
|
+
# hour = 2*digit
|
|
14
|
+
# minute = 2*digit
|
|
15
|
+
# second = 2*digit
|
|
16
|
+
# zone = (`+' | `-') 4*digit
|
|
17
|
+
# %r: GET /apache_pb.gif HTTP/1.0
|
|
18
|
+
# %{User-agent}: "
|
|
19
|
+
#
|
|
20
|
+
# Example
|
|
21
|
+
# 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
|
|
22
|
+
#
|
|
23
|
+
class LogLineParser
|
|
24
|
+
DAY = /[0-9]{2}/
|
|
25
|
+
MONTH = /[A-Za-z]{3}/
|
|
26
|
+
YEAR = /[0-9]{4}/
|
|
27
|
+
TIMEC = /[0-9]{2}/
|
|
28
|
+
TIMEZONE = /(\+|-)[0-9]{4}/
|
|
29
|
+
|
|
30
|
+
IP = /(?<ip>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}|::1|unknown)/
|
|
31
|
+
IDENT = /(?<ident>[^ ]+|-)/
|
|
32
|
+
USERID = /(?<userid>[^ ]+|-)/
|
|
33
|
+
|
|
34
|
+
TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
|
|
35
|
+
|
|
36
|
+
HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
|
|
37
|
+
WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
|
|
38
|
+
OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
|
|
39
|
+
METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
|
|
40
|
+
PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
|
|
41
|
+
URL = /(?<url>[^ ]+)/
|
|
42
|
+
REFERER = /(?<referer>[^"]*)/
|
|
43
|
+
RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
|
|
44
|
+
SIZE = /(?<size>[0-9]+|-)/
|
|
45
|
+
USER_AGENT = /(?<user_agent>[^"]*)/
|
|
46
|
+
|
|
47
|
+
attr_reader :format
|
|
48
|
+
|
|
49
|
+
def initialize
|
|
50
|
+
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def parse(line)
|
|
54
|
+
@format.match(line) ||
|
|
55
|
+
raise("Apache LogLine Parser Error: Could not parse #{line}")
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require "sqlite3"
|
|
2
|
+
require "browser"
|
|
3
|
+
require_relative "log_line_parser"
|
|
4
|
+
|
|
5
|
+
module LogSense
|
|
6
|
+
module Apache
|
|
7
|
+
#
|
|
8
|
+
# parse an Apache log file and return a SQLite3 DB
|
|
9
|
+
#
|
|
10
|
+
class LogParser
|
|
11
|
+
def parse(streams, options = {})
|
|
12
|
+
db = SQLite3::Database.new ":memory:"
|
|
13
|
+
|
|
14
|
+
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
|
15
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
16
|
+
datetime TEXT,
|
|
17
|
+
ip TEXT,
|
|
18
|
+
user TEXT,
|
|
19
|
+
unique_visitor TEXT,
|
|
20
|
+
method TEXT,
|
|
21
|
+
path TEXT,
|
|
22
|
+
extension TEXT,
|
|
23
|
+
status TEXT,
|
|
24
|
+
size INTEGER,
|
|
25
|
+
referer TEXT,
|
|
26
|
+
user_agent TEXT,
|
|
27
|
+
bot INTEGER,
|
|
28
|
+
browser TEXT,
|
|
29
|
+
browser_version TEXT,
|
|
30
|
+
platform TEXT,
|
|
31
|
+
platform_version TEXT,
|
|
32
|
+
source_file TEXT,
|
|
33
|
+
line_number INTEGER
|
|
34
|
+
)"
|
|
35
|
+
|
|
36
|
+
ins = db.prepare("insert into LogLine (
|
|
37
|
+
datetime,
|
|
38
|
+
ip,
|
|
39
|
+
user,
|
|
40
|
+
unique_visitor,
|
|
41
|
+
method,
|
|
42
|
+
path,
|
|
43
|
+
extension,
|
|
44
|
+
status,
|
|
45
|
+
size,
|
|
46
|
+
referer,
|
|
47
|
+
user_agent,
|
|
48
|
+
bot,
|
|
49
|
+
browser,
|
|
50
|
+
browser_version,
|
|
51
|
+
platform,
|
|
52
|
+
platform_version,
|
|
53
|
+
source_file,
|
|
54
|
+
line_number
|
|
55
|
+
)
|
|
56
|
+
values (#{Array.new(18, '?').join(', ')})")
|
|
57
|
+
|
|
58
|
+
parser = LogLineParser.new
|
|
59
|
+
|
|
60
|
+
streams.each do |stream|
|
|
61
|
+
stream.readlines.each_with_index do |line, line_number|
|
|
62
|
+
begin
|
|
63
|
+
hash = parser.parse line
|
|
64
|
+
ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
|
|
65
|
+
ins.execute(
|
|
66
|
+
DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
|
|
67
|
+
hash[:ip],
|
|
68
|
+
hash[:userid],
|
|
69
|
+
unique_visitor_id(hash),
|
|
70
|
+
hash[:method],
|
|
71
|
+
hash[:url],
|
|
72
|
+
(hash[:url] ? File.extname(hash[:url]) : ''),
|
|
73
|
+
hash[:status],
|
|
74
|
+
hash[:size].to_i,
|
|
75
|
+
hash[:referer],
|
|
76
|
+
hash[:user_agent],
|
|
77
|
+
ua.bot? ? 1 : 0,
|
|
78
|
+
(ua.name || ''),
|
|
79
|
+
(ua.version || ''),
|
|
80
|
+
(ua.platform.name || ''),
|
|
81
|
+
(ua.platform.version || ''),
|
|
82
|
+
stream == $stdin ? "stdin" : stream.path,
|
|
83
|
+
line_number
|
|
84
|
+
)
|
|
85
|
+
rescue StandardError => e
|
|
86
|
+
warn e.message
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
db
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def unique_visitor_id hash
|
|
97
|
+
"#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|