log_sense 1.4.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +34 -0
- data/Gemfile.lock +4 -4
- data/README.org +25 -10
- data/Rakefile +17 -3
- data/exe/log_sense +30 -15
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/apache_data_cruncher.rb +19 -21
- data/lib/log_sense/apache_log_line_parser.rb +10 -10
- data/lib/log_sense/apache_log_parser.rb +44 -36
- data/lib/log_sense/emitter.rb +518 -25
- data/lib/log_sense/ip_locator.rb +26 -19
- data/lib/log_sense/options_parser.rb +37 -27
- data/lib/log_sense/rails_data_cruncher.rb +7 -3
- data/lib/log_sense/rails_log_parser.rb +108 -100
- data/lib/log_sense/templates/_command_invocation.html.erb +2 -2
- data/lib/log_sense/templates/_command_invocation.txt.erb +5 -3
- data/lib/log_sense/templates/_navigation.html.erb +22 -0
- data/lib/log_sense/templates/_output_table.html.erb +1 -7
- data/lib/log_sense/templates/_output_table.txt.erb +14 -0
- data/lib/log_sense/templates/_performance.html.erb +1 -1
- data/lib/log_sense/templates/_performance.txt.erb +8 -5
- data/lib/log_sense/templates/_report_data.html.erb +2 -3
- data/lib/log_sense/templates/_stylesheet.css +144 -0
- data/lib/log_sense/templates/_summary.html.erb +2 -2
- data/lib/log_sense/templates/_summary.txt.erb +11 -8
- data/lib/log_sense/templates/_warning.txt.erb +1 -0
- data/lib/log_sense/templates/apache.html.erb +51 -527
- data/lib/log_sense/templates/apache.txt.erb +22 -0
- data/lib/log_sense/templates/rails.html.erb +56 -353
- data/lib/log_sense/templates/rails.txt.erb +10 -60
- data/lib/log_sense/version.rb +1 -1
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1f20a0d2041df1f2414bd0015fff27764c28a89dfd2d45fdb275141638bc5784
|
|
4
|
+
data.tar.gz: 14426b7383fe9b0077c2852f177545384a369f35489aa5d95372c20cbe19732d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9defcff35a5b3802d7d1a7db596a30aa01e28d3b9d784619de5f61713a587e427ccce76fcfcd478e946a0822af8d0822b23bad6392c2a71690ccd07250d5cfb7
|
|
7
|
+
data.tar.gz: 8b9143feb4f7508de9b7f60eddacafc4713c064f8bda21ed9ae92de364aa5e94fed36ff6d12f2b2aa108337d92c25868bce21d172397f48ff3c41ed93dc9a2b5
|
data/CHANGELOG.org
CHANGED
|
@@ -2,6 +2,40 @@
|
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
|
3
3
|
#+STARTUP: showall
|
|
4
4
|
|
|
5
|
+
* 1.5.1
|
|
6
|
+
|
|
7
|
+
- [User] Option --input-files allows to specify input files
|
|
8
|
+
in addition to passing filenames to the command line
|
|
9
|
+
- [User] Minor changes to the layout of HTML reports
|
|
10
|
+
- [User] Add version number in reports
|
|
11
|
+
- [Fixed] Duplicated entries in navigation
|
|
12
|
+
- [Code] Updated and added minitest(s)
|
|
13
|
+
|
|
14
|
+
* 1.5.0
|
|
15
|
+
|
|
16
|
+
- [User] Present Unique Visits / day as integer
|
|
17
|
+
- [User] Added Country and Streaks report for rails
|
|
18
|
+
- [User] Changed Streak report in Apache
|
|
19
|
+
- [Gem] Updated DBIP
|
|
20
|
+
- [Gem] Updated Bundle
|
|
21
|
+
- [Code] Refactored all reports, so that they are specified
|
|
22
|
+
in the same way
|
|
23
|
+
- [Code] Refactor warning message in textual reports
|
|
24
|
+
- [Code] Build HTML menu for report specification
|
|
25
|
+
- [Code] Various refactoring passes on the code
|
|
26
|
+
|
|
27
|
+
* 1.4.1
|
|
28
|
+
|
|
29
|
+
- [User] New textual report for Apache
|
|
30
|
+
- [User] New option -w sets maximum width of URL, Path, and
|
|
31
|
+
Description columns in textual reports
|
|
32
|
+
- [User] Removed option -i, since input filenames are now taken
|
|
33
|
+
as direct arguments
|
|
34
|
+
- [User] Allow multiple files in input
|
|
35
|
+
- [Fixed] Complain if input format is not supported
|
|
36
|
+
- [Code] Refactoring of reports to manage better output to
|
|
37
|
+
multiple formats
|
|
38
|
+
|
|
5
39
|
* 1.4.0
|
|
6
40
|
|
|
7
41
|
- [User] The Apache Log report now organizes page requests in four
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
log_sense (1.
|
|
4
|
+
log_sense (1.4.2)
|
|
5
5
|
browser
|
|
6
6
|
ipaddr
|
|
7
7
|
iso_country_codes
|
|
@@ -13,9 +13,9 @@ GEM
|
|
|
13
13
|
specs:
|
|
14
14
|
browser (5.3.1)
|
|
15
15
|
byebug (11.1.3)
|
|
16
|
-
ipaddr (1.2.
|
|
16
|
+
ipaddr (1.2.4)
|
|
17
17
|
iso_country_codes (0.7.8)
|
|
18
|
-
minitest (5.
|
|
18
|
+
minitest (5.15.0)
|
|
19
19
|
rake (12.3.3)
|
|
20
20
|
sqlite3 (1.4.2)
|
|
21
21
|
terminal-table (3.0.2)
|
|
@@ -32,4 +32,4 @@ DEPENDENCIES
|
|
|
32
32
|
rake (~> 12.0)
|
|
33
33
|
|
|
34
34
|
BUNDLED WITH
|
|
35
|
-
2.
|
|
35
|
+
2.3.3
|
data/README.org
CHANGED
|
@@ -19,8 +19,6 @@ LogSense reports the following data:
|
|
|
19
19
|
- OS, browsers, and devices
|
|
20
20
|
- IP Country location, thanks to the DPIP lite country DB
|
|
21
21
|
- Streaks: resources accessed by a given IP over time
|
|
22
|
-
- Potential attacks: access to resources which are not meant to be
|
|
23
|
-
served by a web server serving static websites
|
|
24
22
|
- Performance of Rails requests
|
|
25
23
|
|
|
26
24
|
Filters from the command line allow to analyze specific periods and
|
|
@@ -33,6 +31,18 @@ And, of course, the compulsory screenshot:
|
|
|
33
31
|
#+ATTR_HTML: :width 80%
|
|
34
32
|
[[file:./apache-screenshot.png]]
|
|
35
33
|
|
|
34
|
+
|
|
35
|
+
* An important word of warning
|
|
36
|
+
|
|
37
|
+
[[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
|
|
38
|
+
user input to forge log entries or inject malicious content into the logs.
|
|
39
|
+
|
|
40
|
+
log_sense sanitizes entries of HTML reports, to try and protect from log
|
|
41
|
+
poisoning. *Log entries and URLs in SQLite3, however, are not sanitized*:
|
|
42
|
+
they are stored and read from the log. This is not, in general, an issue,
|
|
43
|
+
unless you use the data from SQLite in environments in which URLs can be
|
|
44
|
+
opened or code executed.
|
|
45
|
+
|
|
36
46
|
* Motivation
|
|
37
47
|
|
|
38
48
|
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
|
|
@@ -54,6 +64,7 @@ generated files are then made available on a private area on the web.
|
|
|
54
64
|
gem install log_sense
|
|
55
65
|
#+end_src
|
|
56
66
|
|
|
67
|
+
|
|
57
68
|
* Usage
|
|
58
69
|
|
|
59
70
|
#+begin_src bash :results raw output :wrap example
|
|
@@ -62,21 +73,23 @@ generated files are then made available on a private area on the web.
|
|
|
62
73
|
|
|
63
74
|
#+RESULTS:
|
|
64
75
|
#+begin_example
|
|
65
|
-
Usage: log_sense [options] [logfile]
|
|
76
|
+
Usage: log_sense [options] [logfile ...]
|
|
66
77
|
--title=TITLE Title to use in the report
|
|
67
78
|
-f, --input-format=FORMAT Input format (either rails or apache)
|
|
68
|
-
-i, --input-file
|
|
79
|
+
-i, --input-files=file,file, Input files (can also be passed directly)
|
|
69
80
|
-t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
|
|
70
81
|
-o, --output-file=OUTPUT_FILE Output file
|
|
71
82
|
-b, --begin=DATE Consider entries after or on DATE
|
|
72
83
|
-e, --end=DATE Consider entries before or on DATE
|
|
73
|
-
-l, --limit=N
|
|
84
|
+
-l, --limit=N Limit to the N most requested resources (defaults to 900)
|
|
85
|
+
-w, --width=WIDTH Maximum width of URL and description columns in text reports
|
|
74
86
|
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
|
75
87
|
-n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
|
|
88
|
+
--verbose Inform about progress (prints to STDERR)
|
|
76
89
|
-v, --version Prints version information
|
|
77
90
|
-h, --help Prints this help
|
|
78
91
|
|
|
79
|
-
This is version 1.
|
|
92
|
+
This is version 1.5.1
|
|
80
93
|
|
|
81
94
|
Output formats
|
|
82
95
|
rails parsing can produce the following outputs:
|
|
@@ -85,6 +98,7 @@ generated files are then made available on a private area on the web.
|
|
|
85
98
|
- html
|
|
86
99
|
apache parsing can produce the following outputs:
|
|
87
100
|
- sqlite
|
|
101
|
+
- txt
|
|
88
102
|
- html
|
|
89
103
|
#+end_example
|
|
90
104
|
|
|
@@ -95,6 +109,7 @@ log_sense -f apache -i access.log -t txt > access-data.txt
|
|
|
95
109
|
log_sense -f rails -i production.log -t html -o performance.txt
|
|
96
110
|
#+end_example
|
|
97
111
|
|
|
112
|
+
|
|
98
113
|
* Change Log
|
|
99
114
|
|
|
100
115
|
See the [[file:CHANGELOG.org][CHANGELOG]] file.
|
|
@@ -109,8 +124,8 @@ Concerning the outputs:
|
|
|
109
124
|
- HTML reports use [[https://get.foundation/][Zurb Foundation]], [[https://www.datatables.net/][Data Tables]], and [[https://vega.github.io/vega-lite/][Vega Light]], which
|
|
110
125
|
are all downloaded from a CDN
|
|
111
126
|
- The textual format is compatible with [[https://orgmode.org/][Org Mode]] and can be further
|
|
112
|
-
processed to any format [[https://orgmode.org/][Org Mode]] can be exported to
|
|
113
|
-
and PDF
|
|
127
|
+
processed to any format [[https://orgmode.org/][Org Mode]] can be exported to, including HTML
|
|
128
|
+
and PDF, with the word of warning in the section above.
|
|
114
129
|
|
|
115
130
|
* Author and Contributors
|
|
116
131
|
|
|
@@ -118,8 +133,8 @@ Concerning the outputs:
|
|
|
118
133
|
|
|
119
134
|
* Known Bugs
|
|
120
135
|
|
|
121
|
-
No known bugs; an unknown number of unknown bugs.
|
|
122
|
-
|
|
136
|
+
No known bugs; an unknown number of unknown bugs. (See the open issues for
|
|
137
|
+
the known bugs.)
|
|
123
138
|
|
|
124
139
|
* License
|
|
125
140
|
|
data/Rakefile
CHANGED
|
@@ -9,7 +9,21 @@ end
|
|
|
9
9
|
require_relative './lib/log_sense/ip_locator.rb'
|
|
10
10
|
|
|
11
11
|
desc "Convert Geolocation DB to sqlite"
|
|
12
|
-
task :dbip_to_sqlite3, [:
|
|
13
|
-
filename = args[:
|
|
14
|
-
|
|
12
|
+
task :dbip_to_sqlite3, [:year_month] do |tasks, args|
|
|
13
|
+
filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
|
|
14
|
+
|
|
15
|
+
if !File.exist? filename
|
|
16
|
+
puts "Error. Could not find: #{filename}"
|
|
17
|
+
puts
|
|
18
|
+
puts 'I see the following files:'
|
|
19
|
+
puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
|
|
20
|
+
puts ''
|
|
21
|
+
puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
|
|
22
|
+
puts '2. Save downloaded file to ip_locations/'
|
|
23
|
+
puts '3. Relaunch with YYYY-MM'
|
|
24
|
+
|
|
25
|
+
exit
|
|
26
|
+
else
|
|
27
|
+
LogSense::IpLocator::dbip_to_sqlite filename
|
|
28
|
+
end
|
|
15
29
|
end
|
data/exe/log_sense
CHANGED
|
@@ -7,21 +7,22 @@ require 'log_sense.rb'
|
|
|
7
7
|
#
|
|
8
8
|
|
|
9
9
|
# this better be here... OptionsParser consumes ARGV
|
|
10
|
-
@command_line = ARGV.join(
|
|
11
|
-
|
|
10
|
+
@command_line = ARGV.join(' ')
|
|
12
11
|
@options = LogSense::OptionsParser.parse ARGV
|
|
13
|
-
@input_file = @options[:input_file] || ARGV[0]
|
|
14
12
|
@output_file = @options[:output_file]
|
|
15
13
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
14
|
+
#
|
|
15
|
+
# Input files can be gotten from an option and from what remains in
|
|
16
|
+
# ARGV
|
|
17
|
+
#
|
|
18
|
+
@input_filenames = @options[:input_filenames] + ARGV
|
|
19
|
+
@non_existing = @input_filenames.reject { |x| File.exist?(x) }
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
puts "Error: input file '#{@
|
|
21
|
+
unless @non_existing.empty?
|
|
22
|
+
$stderr.puts "Error: input file(s) '#{@non_existing.join(', ')}' do not exist"
|
|
23
23
|
exit 1
|
|
24
24
|
end
|
|
25
|
+
@input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
|
|
25
26
|
|
|
26
27
|
#
|
|
27
28
|
# Parse Log and Track Statistics
|
|
@@ -36,32 +37,46 @@ when 'apache'
|
|
|
36
37
|
when 'rails'
|
|
37
38
|
parser_klass = LogSense::RailsLogParser
|
|
38
39
|
cruncher_klass = LogSense::RailsDataCruncher
|
|
40
|
+
else
|
|
41
|
+
$stderr.puts "Error: input format #{@options[:input_format]} not understood."
|
|
42
|
+
exit 1
|
|
39
43
|
end
|
|
40
44
|
|
|
41
|
-
|
|
45
|
+
$stderr.puts "Parsing input files..." if @options[:verbose]
|
|
46
|
+
@db = parser_klass.parse @input_files
|
|
42
47
|
|
|
43
|
-
if @options[:output_format]
|
|
44
|
-
|
|
48
|
+
if @options[:output_format] == 'sqlite'
|
|
49
|
+
$stderr.puts "Saving to SQLite3..." if @options[:verbose]
|
|
50
|
+
ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
|
|
45
51
|
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
|
46
52
|
b.step(-1) #=> DONE
|
|
47
53
|
b.finish
|
|
48
54
|
else
|
|
55
|
+
$stderr.puts "Aggregating data..." if @options[:verbose]
|
|
49
56
|
@data = cruncher_klass.crunch @db, @options
|
|
57
|
+
|
|
58
|
+
$stderr.puts "Geolocating..." if @options[:verbose]
|
|
50
59
|
@data = LogSense::IpLocator.geolocate @data
|
|
51
60
|
|
|
61
|
+
$stderr.puts "Grouping by country..." if @options[:verbose]
|
|
62
|
+
country_col = @data[:ips][0].size - 1
|
|
63
|
+
@data[:countries] = @data[:ips].group_by { |x| x[country_col] }
|
|
64
|
+
|
|
52
65
|
@ended_at = Time.now
|
|
53
66
|
@duration = @ended_at - @started_at
|
|
54
67
|
|
|
55
68
|
@data = @data.merge({
|
|
56
69
|
command: @command_line,
|
|
57
|
-
|
|
70
|
+
filenames: ARGV,
|
|
71
|
+
log_files: @input_files,
|
|
58
72
|
started_at: @started_at,
|
|
59
73
|
ended_at: @ended_at,
|
|
60
|
-
duration: @duration
|
|
74
|
+
duration: @duration,
|
|
75
|
+
width: @options[:width]
|
|
61
76
|
})
|
|
62
|
-
|
|
63
77
|
#
|
|
64
78
|
# Emit Output
|
|
65
79
|
#
|
|
80
|
+
$stderr.puts "Emitting..." if @options[:verbose]
|
|
66
81
|
puts LogSense::Emitter.emit @data, @options
|
|
67
82
|
end
|
|
Binary file
|
|
@@ -15,17 +15,17 @@ module LogSense
|
|
|
15
15
|
@last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
|
|
16
16
|
|
|
17
17
|
@total_days = 0
|
|
18
|
-
if @first_day
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
@total_days = (@last_day - @first_day).to_i if @first_day && @last_day
|
|
19
|
+
|
|
20
|
+
@source_files = db.execute 'SELECT distinct(source_file) from LogLine'
|
|
21
21
|
|
|
22
|
-
@log_size = db.execute
|
|
22
|
+
@log_size = db.execute 'SELECT count(datetime) from LogLine'
|
|
23
23
|
@log_size = @log_size[0][0]
|
|
24
24
|
|
|
25
25
|
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
|
26
26
|
@selfpolls_size = @selfpolls_size[0][0]
|
|
27
27
|
|
|
28
|
-
@crawlers_size = db.execute
|
|
28
|
+
@crawlers_size = db.execute 'SELECT count(datetime) from LogLine where bot == 1'
|
|
29
29
|
@crawlers_size = @crawlers_size[0][0]
|
|
30
30
|
|
|
31
31
|
@first_day_requested = options[:from_date]
|
|
@@ -35,7 +35,7 @@ module LogSense
|
|
|
35
35
|
@last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
|
|
36
36
|
|
|
37
37
|
@total_days_in_analysis = 0
|
|
38
|
-
if @first_day_in_analysis
|
|
38
|
+
if @first_day_in_analysis && @last_day_in_analysis
|
|
39
39
|
@total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
|
|
40
40
|
end
|
|
41
41
|
|
|
@@ -45,24 +45,24 @@ module LogSense
|
|
|
45
45
|
filter = [
|
|
46
46
|
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
|
47
47
|
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
|
48
|
-
(options[:only_crawlers] ?
|
|
49
|
-
(options[:ignore_crawlers] ?
|
|
48
|
+
(options[:only_crawlers] ? 'bot == 1' : nil),
|
|
49
|
+
(options[:ignore_crawlers] ? 'bot == 0' : nil),
|
|
50
50
|
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
|
51
|
-
|
|
51
|
+
'true'
|
|
52
52
|
].compact.join " and "
|
|
53
53
|
|
|
54
54
|
mega = 1024 * 1024
|
|
55
55
|
giga = mega * 1024
|
|
56
56
|
tera = giga * 1024
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
# in alternative to sum(size)
|
|
59
59
|
human_readable_size = <<-EOS
|
|
60
|
-
CASE
|
|
60
|
+
CASE
|
|
61
61
|
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
|
62
62
|
WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
|
63
63
|
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
|
64
64
|
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
|
65
|
-
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
|
65
|
+
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
|
66
66
|
END AS size
|
|
67
67
|
EOS
|
|
68
68
|
|
|
@@ -117,20 +117,19 @@ module LogSense
|
|
|
117
117
|
|
|
118
118
|
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
|
119
119
|
|
|
120
|
-
@streaks = db.execute
|
|
120
|
+
@streaks = db.execute 'SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime'
|
|
121
121
|
data = {}
|
|
122
122
|
|
|
123
|
-
|
|
124
|
-
var_as_symbol = variable.to_s[1
|
|
125
|
-
data[var_as_symbol] =
|
|
123
|
+
instance_variables.each do |variable|
|
|
124
|
+
var_as_symbol = variable.to_s[1..].to_sym
|
|
125
|
+
data[var_as_symbol] = instance_variable_get(variable)
|
|
126
126
|
end
|
|
127
|
+
|
|
127
128
|
data
|
|
128
129
|
end
|
|
129
130
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def self.date_intersect date1, date2, method
|
|
133
|
-
if date1 and date2
|
|
131
|
+
def self.date_intersect(date1, date2, method)
|
|
132
|
+
if date1 && date2
|
|
134
133
|
[date1, date2].send(method)
|
|
135
134
|
elsif date1
|
|
136
135
|
date1
|
|
@@ -140,4 +139,3 @@ module LogSense
|
|
|
140
139
|
end
|
|
141
140
|
end
|
|
142
141
|
end
|
|
143
|
-
|
|
@@ -31,20 +31,20 @@ module LogSense
|
|
|
31
31
|
|
|
32
32
|
TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
|
|
33
33
|
|
|
34
|
-
HTTP_METHODS
|
|
35
|
-
WEBDAV_METHODS
|
|
36
|
-
OTHER_METHODS
|
|
37
|
-
METHOD
|
|
38
|
-
PROTOCOL
|
|
39
|
-
URL
|
|
40
|
-
REFERER
|
|
41
|
-
RETURN_CODE
|
|
42
|
-
SIZE
|
|
34
|
+
HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
|
|
35
|
+
WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
|
|
36
|
+
OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
|
|
37
|
+
METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
|
|
38
|
+
PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
|
|
39
|
+
URL = /(?<url>[^ ]+)/
|
|
40
|
+
REFERER = /(?<referer>[^"]*)/
|
|
41
|
+
RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
|
|
42
|
+
SIZE = /(?<size>[0-9]+|-)/
|
|
43
43
|
USER_AGENT = /(?<user_agent>[^"]*)/
|
|
44
44
|
|
|
45
45
|
attr_reader :format
|
|
46
46
|
|
|
47
|
-
def initialize
|
|
47
|
+
def initialize
|
|
48
48
|
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
|
49
49
|
end
|
|
50
50
|
|
|
@@ -7,10 +7,9 @@ module LogSense
|
|
|
7
7
|
# parse an Apache log file and return a SQLite3 DB
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
def self.parse
|
|
11
|
-
|
|
10
|
+
def self.parse(streams, options = {})
|
|
11
|
+
db = SQLite3::Database.new ':memory:'
|
|
12
12
|
|
|
13
|
-
db = SQLite3::Database.new ":memory:"
|
|
14
13
|
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
|
15
14
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
16
15
|
datetime TEXT,
|
|
@@ -28,15 +27,18 @@ module LogSense
|
|
|
28
27
|
browser TEXT,
|
|
29
28
|
browser_version TEXT,
|
|
30
29
|
platform TEXT,
|
|
31
|
-
platform_version TEXT
|
|
30
|
+
platform_version TEXT,
|
|
31
|
+
source_file TEXT,
|
|
32
|
+
line_number INTEGER
|
|
33
|
+
)"
|
|
32
34
|
|
|
33
|
-
ins = db.prepare(
|
|
34
|
-
datetime,
|
|
35
|
+
ins = db.prepare("insert into LogLine (
|
|
36
|
+
datetime,
|
|
35
37
|
ip,
|
|
36
38
|
user,
|
|
37
39
|
unique_visitor,
|
|
38
40
|
method,
|
|
39
|
-
path,
|
|
41
|
+
path,
|
|
40
42
|
extension,
|
|
41
43
|
status,
|
|
42
44
|
size,
|
|
@@ -46,44 +48,50 @@ module LogSense
|
|
|
46
48
|
browser,
|
|
47
49
|
browser_version,
|
|
48
50
|
platform,
|
|
49
|
-
platform_version
|
|
50
|
-
|
|
51
|
+
platform_version,
|
|
52
|
+
source_file,
|
|
53
|
+
line_number
|
|
54
|
+
)
|
|
55
|
+
values (#{Array.new(18, '?').join(', ')})")
|
|
51
56
|
|
|
52
57
|
parser = ApacheLogLineParser.new
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
58
|
+
|
|
59
|
+
streams.each do |stream|
|
|
60
|
+
stream.readlines.each_with_index do |line, line_number|
|
|
61
|
+
begin
|
|
62
|
+
hash = parser.parse line
|
|
63
|
+
ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
|
|
64
|
+
ins.execute(
|
|
65
|
+
DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
|
|
66
|
+
hash[:ip],
|
|
67
|
+
hash[:userid],
|
|
68
|
+
unique_visitor_id(hash),
|
|
69
|
+
hash[:method],
|
|
70
|
+
hash[:url],
|
|
71
|
+
(hash[:url] ? File.extname(hash[:url]) : ''),
|
|
72
|
+
hash[:status],
|
|
73
|
+
hash[:size].to_i,
|
|
74
|
+
hash[:referer],
|
|
75
|
+
hash[:user_agent],
|
|
76
|
+
ua.bot? ? 1 : 0,
|
|
77
|
+
(ua.name || ''),
|
|
78
|
+
(ua.version || ''),
|
|
79
|
+
(ua.platform.name || ''),
|
|
80
|
+
(ua.platform.version || ''),
|
|
81
|
+
stream == $stdin ? "stdin" : stream.path,
|
|
82
|
+
line_number
|
|
83
|
+
)
|
|
84
|
+
rescue StandardError => e
|
|
85
|
+
$stderr.puts e.message
|
|
86
|
+
end
|
|
78
87
|
end
|
|
79
88
|
end
|
|
80
|
-
|
|
89
|
+
|
|
81
90
|
db
|
|
82
91
|
end
|
|
83
92
|
|
|
84
93
|
def self.unique_visitor_id hash
|
|
85
94
|
"#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
|
|
86
95
|
end
|
|
87
|
-
|
|
88
96
|
end
|
|
89
97
|
end
|