log_sense 1.5.1 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.org +51 -1
  3. data/Gemfile.lock +15 -6
  4. data/LICENSE.txt +9 -2
  5. data/README.org +113 -36
  6. data/Rakefile +6 -6
  7. data/exe/log_sense +110 -39
  8. data/ip_locations/dbip-country-lite.sqlite3 +0 -0
  9. data/lib/log_sense/aggregator.rb +191 -0
  10. data/lib/log_sense/apache_aggregator.rb +122 -0
  11. data/lib/log_sense/apache_log_line_parser.rb +23 -21
  12. data/lib/log_sense/apache_log_parser.rb +15 -12
  13. data/lib/log_sense/apache_report_shaper.rb +309 -0
  14. data/lib/log_sense/emitter.rb +80 -506
  15. data/lib/log_sense/ip_locator.rb +24 -12
  16. data/lib/log_sense/options_checker.rb +24 -0
  17. data/lib/log_sense/options_parser.rb +84 -50
  18. data/lib/log_sense/rails_aggregator.rb +69 -0
  19. data/lib/log_sense/rails_log_parser.rb +82 -68
  20. data/lib/log_sense/rails_report_shaper.rb +183 -0
  21. data/lib/log_sense/report_shaper.rb +105 -0
  22. data/lib/log_sense/templates/_cdn_links.html.erb +11 -0
  23. data/lib/log_sense/templates/_command_invocation.html.erb +4 -0
  24. data/lib/log_sense/templates/_log_structure.html.erb +13 -4
  25. data/lib/log_sense/templates/_output_table.html.erb +54 -2
  26. data/lib/log_sense/templates/_output_table.txt.erb +3 -1
  27. data/lib/log_sense/templates/_rails.css.erb +7 -0
  28. data/lib/log_sense/templates/_report_data.html.erb +4 -4
  29. data/lib/log_sense/templates/_summary.html.erb +14 -5
  30. data/lib/log_sense/templates/_summary.txt.erb +2 -2
  31. data/lib/log_sense/templates/{rails.html.erb → report_html.erb} +19 -37
  32. data/lib/log_sense/templates/{apache.txt.erb → report_txt.erb} +1 -1
  33. data/lib/log_sense/version.rb +1 -1
  34. data/lib/log_sense.rb +19 -9
  35. data/log_sense.gemspec +21 -21
  36. data/{apache-screenshot.png → screenshots/apache-screenshot.png} +0 -0
  37. data/screenshots/rails-screenshot.png +0 -0
  38. metadata +21 -16
  39. data/lib/log_sense/apache_data_cruncher.rb +0 -141
  40. data/lib/log_sense/rails_data_cruncher.rb +0 -141
  41. data/lib/log_sense/templates/apache.html.erb +0 -115
  42. data/lib/log_sense/templates/rails.txt.erb +0 -22
  43. data/sample_logs/empty_log.log +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f20a0d2041df1f2414bd0015fff27764c28a89dfd2d45fdb275141638bc5784
4
- data.tar.gz: 14426b7383fe9b0077c2852f177545384a369f35489aa5d95372c20cbe19732d
3
+ metadata.gz: 7d269dedfbb6ec6eae3a77491cc5ec7ca6241f388f2658964541cdd3983b8298
4
+ data.tar.gz: 6f24d23c8d06430b3605aad90522e08818cd18fd6c71c5fe90823cdc9483c81e
5
5
  SHA512:
6
- metadata.gz: 9defcff35a5b3802d7d1a7db596a30aa01e28d3b9d784619de5f61713a587e427ccce76fcfcd478e946a0822af8d0822b23bad6392c2a71690ccd07250d5cfb7
7
- data.tar.gz: 8b9143feb4f7508de9b7f60eddacafc4713c064f8bda21ed9ae92de364aa5e94fed36ff6d12f2b2aa108337d92c25868bce21d172397f48ff3c41ed93dc9a2b5
6
+ metadata.gz: a63f715b281101a6f61029da3e2bcf5db4d47a537af562507b0184d6c6755eed436afed729c31cc95255bf064cbe9f487917c96d78199bbee25e6d4189469951
7
+ data.tar.gz: 77c62a24c3c81067dd0288ad606b732e0f112d0d1710b326be9e894aa72a93db4cb0a188c93b54ada728c2eeb0cf7378fb7033e13982c9a0932414c8455d2703
data/CHANGELOG.org CHANGED
@@ -2,6 +2,56 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.6.0
6
+
7
+ - [User] New output format =ufw= generates directives to blacklist IPs
8
+ requesting URLs matching a pattern. For users of the Uncomplicated
9
+ Firewall.
10
+ - [User] new option =--no-geo= skips geolocation, which is terribly
11
+ costly in the current implementation.
12
+ - [User] Updated DB-IP country file to Dec 2022 version.
13
+ - [User] Changed name of SQLite output format to sqlite3
14
+ - [User] It is now possible to start analysis from a sqlite3 DB
15
+ generated by log_sense, breaking parsing and generation in two
16
+ steps.
17
+ - [User] Check for correctness of I/O formats before launching
18
+ analysis
19
+ - [User] Streak report has been renames Session. Limited the number
20
+ of URLs shown in each session, to avoid buffer?/memory overflows
21
+ when an IP requests a massive amount of URLs.
22
+ - [User] Added an IP-per-hour visits report.
23
+ - [Code] A rather extensive refactoring of the source code to
24
+ remove code duplications and improve code structure.
25
+ - [Code] Rubocop-ped various files
26
+ - [Code] Added text renderer to DataTable, which sanitizes input and
27
+ further reduces risks of XSS and log poisoning attacks
28
+ - [Code] CDN links have been ported into the Emitter module and used
29
+ in the Embedded Ruby Templates (erbs). This simplifies version
30
+ updates of Javascript libraries used in reports.
31
+
32
+ * 1.5.2
33
+
34
+ - [User] Updated DB-IP country file.
35
+ - [User] Added reports "Missed Pages by IP" and "Missed Resources by
36
+ IP". It can help pinpoint attack sources.
37
+ - [User] Added report "Combined Platform", which puts together
38
+ Browser, OS, and IP.
39
+ - [User] Summary now shows total size transferred.
40
+ - [User] Added link to DB-IP page for IPs in some tables.
41
+ - [User] Added count of IPs by Country.
42
+ - [User] Improved textual report: values in cells holding multiple
43
+ values (e.g. IPs) are now shown in distinct lines in the cell. A new
44
+ option -r limits the number of lines shown per cell.
45
+ - [Default] The number of rows initially shown in HTML reports is now 25.
46
+ - [Default] Default for number of entries in textual report is now
47
+ 100 (used to be 900).
48
+ - [Fixed] The size column in HTML reports is now sorted numerically.
49
+ - [Code] Improved performances of DataTable rendering, using the
50
+ dataRender flag.
51
+ - [Code] Use trim_mode in ERB to avoid empty lines in HTML output.
52
+ - [Code] Moved to the debug gem.
53
+ - [Gem] Updated email and author's name.
54
+
5
55
  * 1.5.1
6
56
 
7
57
  - [User] Option --input-files allows to specify input files
@@ -16,7 +66,7 @@
16
66
  - [User] Present Unique Visits / day as integer
17
67
  - [User] Added Country and Streaks report for rails
18
68
  - [User] Changed Streak report in Apache
19
- - [Gem] Updated DBIP
69
+ - [Gem] Updated DB-IP
20
70
  - [Gem] Updated Bundle
21
71
  - [Code] Refactored all reports, so that they are specified
22
72
  in the same way
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- log_sense (1.4.2)
4
+ log_sense (1.5.3)
5
5
  browser
6
6
  ipaddr
7
7
  iso_country_codes
@@ -12,21 +12,30 @@ GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
14
  browser (5.3.1)
15
- byebug (11.1.3)
16
- ipaddr (1.2.4)
15
+ debug (1.6.2)
16
+ irb (>= 1.3.6)
17
+ reline (>= 0.3.1)
18
+ io-console (0.5.11)
19
+ ipaddr (1.2.5)
20
+ irb (1.4.1)
21
+ reline (>= 0.3.0)
17
22
  iso_country_codes (0.7.8)
23
+ mini_portile2 (2.8.0)
18
24
  minitest (5.15.0)
19
25
  rake (12.3.3)
20
- sqlite3 (1.4.2)
26
+ reline (0.3.1)
27
+ io-console (~> 0.5)
28
+ sqlite3 (1.5.4)
29
+ mini_portile2 (~> 2.8.0)
21
30
  terminal-table (3.0.2)
22
31
  unicode-display_width (>= 1.1.1, < 3)
23
- unicode-display_width (2.1.0)
32
+ unicode-display_width (2.3.0)
24
33
 
25
34
  PLATFORMS
26
35
  ruby
27
36
 
28
37
  DEPENDENCIES
29
- byebug
38
+ debug
30
39
  log_sense!
31
40
  minitest
32
41
  rake (~> 12.0)
data/LICENSE.txt CHANGED
@@ -1,5 +1,4 @@
1
- The MIT License (MIT)
2
-
1
+ The source code is distributed under the terms of the MIT License (MIT)
3
2
  Copyright (c) 2021 Shair.Tech
4
3
 
5
4
  Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -19,3 +18,11 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
18
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
19
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
20
  THE SOFTWARE.
21
+
22
+ ==============================================================================
23
+ LogSense uses data from The free DB-IP Lite database for geolocation purposes.
24
+
25
+ The DB-IP Lite Database is licensed under a Creative Commons Attribution 4.0
26
+ International License.
27
+
28
+ For more information: https://db-ip.com
data/README.org CHANGED
@@ -9,29 +9,62 @@ Rails logs. Written in Ruby, it runs from the command line, it is
9
9
  fast, and it can be installed on any system with a relatively recent
10
10
  version of Ruby. We tested on Ruby 2.6.9, Ruby 3.0.x and later.
11
11
 
12
- LogSense reports the following data:
12
+ When generating reports, LogSense reports the following data:
13
13
 
14
14
  - Visitors, hits, unique visitors, bandwidth used
15
15
  - Most accessed HTML pages
16
16
  - Most accessed resources
17
+ - Missed resources (also by IP) which helps highlight
18
+ potential attacks
17
19
  - Response statuses
18
20
  - Referers
19
21
  - OS, browsers, and devices
20
- - IP Country location, thanks to the DPIP lite country DB
22
+ - IP Country location, thanks to the DP-IP lite country DB
21
23
  - Streaks: resources accessed by a given IP over time
22
24
  - Performance of Rails requests
25
+
26
+ A special output format =ufw= generates rules for the [[https://launchpad.net/ufw][Uncomplicated
27
+ Firewall]] to blacklist IPs requesting URLs matching a specific pattern.
23
28
 
24
29
  Filters from the command line allow to analyze specific periods and
25
30
  distinguish traffic generated by self polls and crawlers.
26
31
 
27
- LogSense generates HTML, txt, and SQLite outputs.
32
+ LogSense generates HTML, txt, ufw, and SQLite outputs.
28
33
 
29
- And, of course, the compulsory screenshot:
34
+ ** Apache Report Structure
30
35
 
31
36
  #+ATTR_HTML: :width 80%
32
- [[file:./apache-screenshot.png]]
37
+ [[file:./screenshots/apache-screenshot.png]]
38
+
39
+
40
+ ** Rails Report Structure
41
+
42
+ #+ATTR_HTML: :width 80%
43
+ [[file:./screenshots/rails-screenshot.png]]
44
+
45
+
46
+ ** UFW Report
33
47
 
48
+ The output format =ufw= generates directives for Uncomplicated
49
+ Firewall blacklisting IPs requesting URLs matching a given pattern.
34
50
 
51
+ We use it to blacklist IPs requesting WordPress login pages on our
52
+ websites... since we don't use WordPress for our websites.
53
+
54
+ *Example*
55
+
56
+ #+begin_src
57
+ $ log_sense -f apache -t ufw -i apache.log
58
+ # /users/sign_in/xmlrpc.php?rsd
59
+ ufw deny from 20.212.3.206
60
+
61
+ # /wp-login.php /wordpress/wp-login.php /blog/wp-login.php /wp/wp-login.php
62
+ ufw deny from 185.255.134.18
63
+
64
+ ...
65
+ #+end_src
66
+
67
+
35
68
  * An important word of warning
36
69
 
37
70
  [[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
@@ -46,9 +79,10 @@ opened or code executed.
46
79
  * Motivation
47
80
 
48
81
  LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
49
- strongly inspired the development of Log Sense) and [[https://umami.is/][Umami]], focusing on
50
- *privacy* and *data-ownership*: the data generated by LogSense is
51
- stored on your computer and owned by you (like it should be)[fn:1].
82
+ strongly inspired the development of Log Sense) and [[https://umami.is/][Umami]], both
83
+ focusing on *privacy* and *data-ownership*: the data generated by
84
+ LogSense is stored on your computer and owned by you (like it should
85
+ be)[fn:1].
52
86
 
53
87
  LogSense is also inspired by *static websites generators*: statistics
54
88
  are generated from the command line and accessed as static HTML files.
@@ -74,41 +108,84 @@ generated files are then made available on a private area on the web.
74
108
  #+RESULTS:
75
109
  #+begin_example
76
110
  Usage: log_sense [options] [logfile ...]
77
- --title=TITLE Title to use in the report
78
- -f, --input-format=FORMAT Input format (either rails or apache)
79
- -i, --input-files=file,file, Input files (can also be passed directly)
80
- -t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
81
- -o, --output-file=OUTPUT_FILE Output file
82
- -b, --begin=DATE Consider entries after or on DATE
83
- -e, --end=DATE Consider entries before or on DATE
84
- -l, --limit=N Limit to the N most requested resources (defaults to 900)
85
- -w, --width=WIDTH Maximum width of URL and description columns in text reports
86
- -c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
87
- -n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
88
- --verbose Inform about progress (prints to STDERR)
89
- -v, --version Prints version information
90
- -h, --help Prints this help
91
-
92
- This is version 1.5.1
93
-
94
- Output formats
95
- rails parsing can produce the following outputs:
96
- - sqlite
97
- - txt
98
- - html
99
- apache parsing can produce the following outputs:
100
- - sqlite
101
- - txt
102
- - html
111
+ --title=TITLE Title to use in the report
112
+ -f, --input-format=FORMAT Input format (either rails or apache)
113
+ -i, --input-files=file,file, Input files (can also be passed directly)
114
+ -t, --output-format=FORMAT Output format: html, org, txt, sqlite.
115
+ -o, --output-file=OUTPUT_FILE Output file
116
+ -b, --begin=DATE Consider entries after or on DATE
117
+ -e, --end=DATE Consider entries before or on DATE
118
+ -l, --limit=N Limit to the N most requested resources (defaults to 100)
119
+ -w, --width=WIDTH Maximum width of long columns in textual reports
120
+ -r, --rows=ROWS Maximum number of rows for columns with multiple entries in textual reports
121
+ -p, --pattern=PATTERN Pattern to use with ufw report to decide IP to blacklist
122
+ -c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
123
+ --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
124
+ -n, --no-geog Do not geolocate entries
125
+ --verbose Inform about progress (output to STDERR)
126
+ -v, --version Prints version information
127
+ -h, --help Prints this help
128
+
129
+ This is version 1.6.0
130
+
131
+ Output formats:
132
+
133
+ - rails: txt, html, sqlite3, ufw
134
+ - apache: txt, html, sqlite3, ufw
103
135
  #+end_example
104
136
 
105
137
  Examples:
106
138
 
107
139
  #+begin_example sh
108
140
  log_sense -f apache -i access.log -t txt > access-data.txt
109
- log_sense -f rails -i production.log -t html -o performance.txt
141
+ log_sense -f rails -i production.log -t html -o performance.html
110
142
  #+end_example
111
143
 
144
+ * Code Structure
145
+
146
+ The code implements a pipeline, with the following steps:
147
+
148
+ 1. *Parser:* parses a log to a SQLite3 database. The database
149
+ contains a table with a list of events, and, in the case of Rails
150
+ report, a table with the errors.
151
+ 2. *Aggregator:* takes as input a SQLite DB and aggregates data,
152
+ typically performing "group by", which are simpler to generate in
153
+ Ruby, rather than in SQL. The module outputs a Hash, with
154
+ different reporting data.
155
+ 3. *GeoLocator:* add country information to all the reporting data
156
+ which has an IP as one the fields.
157
+ 4. *Shaper:* makes (geolocated) aggregated data (e.g. Hashes and
158
+ such), into Array of Arrays, simplifying the structure of the code
159
+ building the reports.
160
+ 5. *Emitter* generates reports from shaped data using ERB.
161
+
162
+ The architecture and the structure of the code is far from being nice,
163
+ for historical reason and for a bunch of small differences existing
164
+ between the input and the outputs to be generated. This usually ends
165
+ up with modifications to the code that have to be replicated in
166
+ different parts of the code and in interferences.
167
+
168
+ Among the points I would like to address:
169
+
170
+ - The execution pipeline in the main script has a few exceptions to
171
+ manage SQLite reading/dumping and ufw report. A linear structure
172
+ would be a lot nicer.
173
+ - Two different classes are defined for steps 1, 2, and 4, to manage,
174
+ respectively, Apache and Rails logs. These classes inherit from a
175
+ common ancestor (e.g. ApacheParser and RailsParser both inherit from
176
+ Parser), but there is still too little code shared. A nicer
177
+ approach would be that of identifying a common DB structure and
178
+ unify the pipeline up to (or including) the generation of
179
+ reports. There are a bunch of small different things to highlight in
180
+ reports, which still make this difficult. For instance, the country
181
+ report for Apache reports size of TX data, which is not available
182
+ for Rail reports.
183
+ - Geolocation could become a lot more efficient if performed in
184
+ SQLite, rather than in Ruby
185
+ - The distinction between Aggregation, Shaping, and Emission is a too
186
+ fine-grained and it would be nice to be able to cleanly remove one
187
+ of the steps.
188
+
112
189
 
113
190
  * Change Log
114
191
 
@@ -138,7 +215,7 @@ the known bugs.)
138
215
 
139
216
  * License
140
217
 
141
- Distributed under the terms of the [[http://opensource.org/licenses/MIT][MIT License]].
218
+ Source code distributed under the terms of the [[http://opensource.org/licenses/MIT][MIT License]].
142
219
 
143
220
  Geolocation is made possible by the DB-IP.com IP to City database,
144
221
  released under a CC license.
data/Rakefile CHANGED
@@ -9,18 +9,18 @@ end
9
9
  require_relative './lib/log_sense/ip_locator.rb'
10
10
 
11
11
  desc "Convert Geolocation DB to sqlite"
12
- task :dbip_to_sqlite3, [:year_month] do |tasks, args|
13
- filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
12
+ task :dbip, [:filename] do |tasks, args|
13
+ filename = args[:filename]
14
14
 
15
15
  if !File.exist? filename
16
16
  puts "Error. Could not find: #{filename}"
17
17
  puts
18
18
  puts 'I see the following files:'
19
19
  puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
20
- puts ''
21
- puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
22
- puts '2. Save downloaded file to ip_locations/'
23
- puts '3. Relaunch with YYYY-MM'
20
+ puts
21
+ puts "1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite"
22
+ puts "2. Save downloaded file to ip_locations/"
23
+ puts "3. Relaunch with YYYY-MM"
24
24
 
25
25
  exit
26
26
  else
data/exe/log_sense CHANGED
@@ -1,82 +1,153 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'log_sense.rb'
3
+ require "log_sense"
4
+ require "sqlite3"
4
5
 
5
6
  #
6
7
  # Parse Command Line Arguments
7
8
  #
8
9
 
9
10
  # this better be here... OptionsParser consumes ARGV
10
- @command_line = ARGV.join(' ')
11
- @options = LogSense::OptionsParser.parse ARGV
12
- @output_file = @options[:output_file]
11
+ @command_line = ARGV.join(" ")
12
+ @options = LogSense::OptionsParser.parse ARGV
13
+ @input_filenames = @options[:input_filenames] + ARGV
14
+ @output_filename = @options[:output_filename]
13
15
 
14
16
  #
15
- # Input files can be gotten from an option and from what remains in
16
- # ARGV
17
+ # Check correctness of input data.
18
+ #
19
+
20
+ #
21
+ # Check input files
17
22
  #
18
- @input_filenames = @options[:input_filenames] + ARGV
19
23
  @non_existing = @input_filenames.reject { |x| File.exist?(x) }
20
24
 
21
- unless @non_existing.empty?
22
- $stderr.puts "Error: input file(s) '#{@non_existing.join(', ')}' do not exist"
25
+ if @non_existing.any?
26
+ warn "Error: some input file(s) \"#{@non_existing.join(", ")}\" do not exist"
27
+ exit 1
28
+ end
29
+
30
+ #
31
+ # Special condition: sqlite3 requires a single file as input
32
+ #
33
+ if @input_filenames.size > 0 &&
34
+ File.extname(@input_filenames.first) == "sqlite3" &&
35
+ @input_filenames.size > 1
36
+ warn "Error: you can pass only one sqlite3 file as input"
37
+ exit 1
38
+ end
39
+
40
+ #
41
+ # Supported input/output chains
42
+ #
43
+ iformat = @options[:input_format]
44
+ oformat = @options[:output_format]
45
+
46
+ if !LogSense::OptionsChecker::compatible?(iformat, oformat)
47
+ warn "Error: don't know how to make #{iformat} into #{oformat}."
48
+ warn "Possible transformation chains:"
49
+ warn LogSense::OptionsChecker.chains_to_s
23
50
  exit 1
24
51
  end
25
- @input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
26
52
 
27
53
  #
28
- # Parse Log and Track Statistics
54
+ # Do the work
29
55
  #
30
56
 
31
57
  @started_at = Time.now
32
58
 
33
- case @options[:input_format]
34
- when 'apache'
35
- parser_klass = LogSense::ApacheLogParser
36
- cruncher_klass = LogSense::ApacheDataCruncher
37
- when 'rails'
38
- parser_klass = LogSense::RailsLogParser
39
- cruncher_klass = LogSense::RailsDataCruncher
59
+ if @input_filenames.size > 0 &&
60
+ File.extname(@input_filenames.first) == ".sqlite3"
61
+ warn "Reading SQLite3 DB ..." if @options[:verbose]
62
+ @db = SQLite3::Database.open @input_filenames.first
40
63
  else
41
- $stderr.puts "Error: input format #{@options[:input_format]} not understood."
42
- exit 1
64
+ warn "Parsing ..." if @options[:verbose]
65
+ @input_files = if @input_filenames.empty?
66
+ [$stdin]
67
+ else
68
+ @input_filenames.map { |fname| File.open(fname, "r") }
69
+ end
70
+ class_name = "LogSense::#{@options[:input_format].capitalize}LogParser"
71
+ parser_class = Object.const_get class_name
72
+ parser = parser_class.new
73
+ @db = parser.parse @input_files
43
74
  end
44
75
 
45
- $stderr.puts "Parsing input files..." if @options[:verbose]
46
- @db = parser_klass.parse @input_files
76
+ if @options[:output_format] == "sqlite3"
77
+ warn "Saving SQLite3 DB ..." if @options[:verbose]
47
78
 
48
- if @options[:output_format] == 'sqlite'
49
- $stderr.puts "Saving to SQLite3..." if @options[:verbose]
50
- ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
51
- b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
79
+ ddb = SQLite3::Database.new(@output_filename || "db.sqlite3")
80
+ b = SQLite3::Backup.new(ddb, "main", @db, "main")
52
81
  b.step(-1) #=> DONE
53
82
  b.finish
83
+
84
+ exit 0
85
+ elsif @options[:output_format] == "ufw"
86
+ pattern = @options[:pattern] || "php"
87
+
88
+ if @options[:input_format] == "rails"
89
+ query = "select distinct event.ip,event.url
90
+ from error join event
91
+ where event.log_id = error.log_id and
92
+ event.url like '%#{pattern}%'"
93
+ else
94
+ query = "select distinct ip,path from logline
95
+ where path like '%#{pattern}%'"
96
+ end
97
+
98
+ ips = @db.execute query
99
+ ips_and_urls = ips.group_by { |x| x[0] }.transform_values { |x|
100
+ x.map { |y| y[1..-1] }.flatten
101
+ }
102
+ ips_and_urls.each do |ip, urls|
103
+ puts "# #{urls[0..10].uniq.join(' ')}"
104
+ puts "ufw deny from #{ip}"
105
+ puts
106
+ end
107
+
108
+ exit 0
54
109
  else
55
- $stderr.puts "Aggregating data..." if @options[:verbose]
56
- @data = cruncher_klass.crunch @db, @options
110
+ warn "Aggregating data ..." if @options[:verbose]
111
+ class_name = "LogSense::#{@options[:input_format].capitalize}Aggregator"
112
+ aggr_class = Object.const_get class_name
113
+ aggr = aggr_class.new(@db, @options)
114
+ @data = aggr.aggregate
57
115
 
58
- $stderr.puts "Geolocating..." if @options[:verbose]
59
- @data = LogSense::IpLocator.geolocate @data
116
+ if @options[:geolocation]
117
+ warn "Geolocating ..." if @options[:verbose]
118
+ @data = LogSense::IpLocator.geolocate @data
60
119
 
61
- $stderr.puts "Grouping by country..." if @options[:verbose]
62
- country_col = @data[:ips][0].size - 1
63
- @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
120
+ warn "Grouping IPs by country ..." if @options[:verbose]
121
+ country_col = @data[:ips][0].size - 1
122
+ @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
123
+ else
124
+ @data[:countries] = {}
125
+ end
64
126
 
65
127
  @ended_at = Time.now
66
128
  @duration = @ended_at - @started_at
67
129
 
68
130
  @data = @data.merge({
69
131
  command: @command_line,
70
- filenames: ARGV,
132
+ filenames: @input_filenames,
71
133
  log_files: @input_files,
72
134
  started_at: @started_at,
73
135
  ended_at: @ended_at,
74
136
  duration: @duration,
75
137
  width: @options[:width]
76
138
  })
77
- #
78
- # Emit Output
79
- #
80
- $stderr.puts "Emitting..." if @options[:verbose]
81
- puts LogSense::Emitter.emit @data, @options
139
+
140
+ if @options[:verbose]
141
+ warn "I have the following keys in data: "
142
+ warn @data.keys.sort.map { |key| "#{key}: #{@data[key].class}" }.join("\n")
143
+ end
144
+
145
+ warn "Shaping data for output ..." if @options[:verbose]
146
+ class_name = "LogSense::#{@options[:input_format].capitalize}ReportShaper"
147
+ shaper_class = Object.const_get class_name
148
+ shaper = shaper_class.new
149
+ @reports = shaper.shape @data
150
+
151
+ warn "Emitting..." if @options[:verbose]
152
+ puts LogSense::Emitter.emit @reports, @data, @options
82
153
  end
Binary file