log_sense 1.4.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.org +34 -0
- data/Gemfile.lock +4 -4
- data/README.org +25 -10
- data/Rakefile +17 -3
- data/exe/log_sense +30 -15
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/apache_data_cruncher.rb +19 -21
- data/lib/log_sense/apache_log_line_parser.rb +10 -10
- data/lib/log_sense/apache_log_parser.rb +44 -36
- data/lib/log_sense/emitter.rb +518 -25
- data/lib/log_sense/ip_locator.rb +26 -19
- data/lib/log_sense/options_parser.rb +37 -27
- data/lib/log_sense/rails_data_cruncher.rb +7 -3
- data/lib/log_sense/rails_log_parser.rb +108 -100
- data/lib/log_sense/templates/_command_invocation.html.erb +2 -2
- data/lib/log_sense/templates/_command_invocation.txt.erb +5 -3
- data/lib/log_sense/templates/_navigation.html.erb +22 -0
- data/lib/log_sense/templates/_output_table.html.erb +1 -7
- data/lib/log_sense/templates/_output_table.txt.erb +14 -0
- data/lib/log_sense/templates/_performance.html.erb +1 -1
- data/lib/log_sense/templates/_performance.txt.erb +8 -5
- data/lib/log_sense/templates/_report_data.html.erb +2 -3
- data/lib/log_sense/templates/_stylesheet.css +144 -0
- data/lib/log_sense/templates/_summary.html.erb +2 -2
- data/lib/log_sense/templates/_summary.txt.erb +11 -8
- data/lib/log_sense/templates/_warning.txt.erb +1 -0
- data/lib/log_sense/templates/apache.html.erb +51 -527
- data/lib/log_sense/templates/apache.txt.erb +22 -0
- data/lib/log_sense/templates/rails.html.erb +56 -353
- data/lib/log_sense/templates/rails.txt.erb +10 -60
- data/lib/log_sense/version.rb +1 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f20a0d2041df1f2414bd0015fff27764c28a89dfd2d45fdb275141638bc5784
|
4
|
+
data.tar.gz: 14426b7383fe9b0077c2852f177545384a369f35489aa5d95372c20cbe19732d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9defcff35a5b3802d7d1a7db596a30aa01e28d3b9d784619de5f61713a587e427ccce76fcfcd478e946a0822af8d0822b23bad6392c2a71690ccd07250d5cfb7
|
7
|
+
data.tar.gz: 8b9143feb4f7508de9b7f60eddacafc4713c064f8bda21ed9ae92de364aa5e94fed36ff6d12f2b2aa108337d92c25868bce21d172397f48ff3c41ed93dc9a2b5
|
data/CHANGELOG.org
CHANGED
@@ -2,6 +2,40 @@
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
3
3
|
#+STARTUP: showall
|
4
4
|
|
5
|
+
* 1.5.1
|
6
|
+
|
7
|
+
- [User] Option --input-files allows to specify input files
|
8
|
+
in addition to passing filenames to the command line
|
9
|
+
- [User] Minor changes to the layout of HTML reports
|
10
|
+
- [User] Add version number in reports
|
11
|
+
- [Fixed] Duplicated entries in navigation
|
12
|
+
- [Code] Updated and added minitest(s)
|
13
|
+
|
14
|
+
* 1.5.0
|
15
|
+
|
16
|
+
- [User] Present Unique Visits / day as integer
|
17
|
+
- [User] Added Country and Streaks report for rails
|
18
|
+
- [User] Changed Streak report in Apache
|
19
|
+
- [Gem] Updated DBIP
|
20
|
+
- [Gem] Updated Bundle
|
21
|
+
- [Code] Refactored all reports, so that they are specified
|
22
|
+
in the same way
|
23
|
+
- [Code] Refactor warning message in textual reports
|
24
|
+
- [Code] Build HTML menu for report specification
|
25
|
+
- [Code] Various refactoring passes on the code
|
26
|
+
|
27
|
+
* 1.4.1
|
28
|
+
|
29
|
+
- [User] New textual report for Apache
|
30
|
+
- [User] New option -w sets maximum width of URL, Path, and
|
31
|
+
Description columns in textual reports
|
32
|
+
- [User] Removed option -i, since input filenames are now taken
|
33
|
+
as direct arguments
|
34
|
+
- [User] Allow multiple files in input
|
35
|
+
- [Fixed] Complain if input format is not supported
|
36
|
+
- [Code] Refactoring of reports to manage better output to
|
37
|
+
multiple formats
|
38
|
+
|
5
39
|
* 1.4.0
|
6
40
|
|
7
41
|
- [User] The Apache Log report now organizes page requests in four
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
log_sense (1.
|
4
|
+
log_sense (1.4.2)
|
5
5
|
browser
|
6
6
|
ipaddr
|
7
7
|
iso_country_codes
|
@@ -13,9 +13,9 @@ GEM
|
|
13
13
|
specs:
|
14
14
|
browser (5.3.1)
|
15
15
|
byebug (11.1.3)
|
16
|
-
ipaddr (1.2.
|
16
|
+
ipaddr (1.2.4)
|
17
17
|
iso_country_codes (0.7.8)
|
18
|
-
minitest (5.
|
18
|
+
minitest (5.15.0)
|
19
19
|
rake (12.3.3)
|
20
20
|
sqlite3 (1.4.2)
|
21
21
|
terminal-table (3.0.2)
|
@@ -32,4 +32,4 @@ DEPENDENCIES
|
|
32
32
|
rake (~> 12.0)
|
33
33
|
|
34
34
|
BUNDLED WITH
|
35
|
-
2.
|
35
|
+
2.3.3
|
data/README.org
CHANGED
@@ -19,8 +19,6 @@ LogSense reports the following data:
|
|
19
19
|
- OS, browsers, and devices
|
20
20
|
- IP Country location, thanks to the DPIP lite country DB
|
21
21
|
- Streaks: resources accessed by a given IP over time
|
22
|
-
- Potential attacks: access to resources which are not meant to be
|
23
|
-
served by a web server serving static websites
|
24
22
|
- Performance of Rails requests
|
25
23
|
|
26
24
|
Filters from the command line allow to analyze specific periods and
|
@@ -33,6 +31,18 @@ And, of course, the compulsory screenshot:
|
|
33
31
|
#+ATTR_HTML: :width 80%
|
34
32
|
[[file:./apache-screenshot.png]]
|
35
33
|
|
34
|
+
|
35
|
+
* An important word of warning
|
36
|
+
|
37
|
+
[[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
|
38
|
+
user input to forge log entries or inject malicious content into the logs.
|
39
|
+
|
40
|
+
log_sense sanitizes entries of HTML reports, to try and protect from log
|
41
|
+
poisoning. *Log entries and URLs in SQLite3, however, are not sanitized*:
|
42
|
+
they are stored and read from the log. This is not, in general, an issue,
|
43
|
+
unless you use the data from SQLite in environments in which URLs can be
|
44
|
+
opened or code executed.
|
45
|
+
|
36
46
|
* Motivation
|
37
47
|
|
38
48
|
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
|
@@ -54,6 +64,7 @@ generated files are then made available on a private area on the web.
|
|
54
64
|
gem install log_sense
|
55
65
|
#+end_src
|
56
66
|
|
67
|
+
|
57
68
|
* Usage
|
58
69
|
|
59
70
|
#+begin_src bash :results raw output :wrap example
|
@@ -62,21 +73,23 @@ generated files are then made available on a private area on the web.
|
|
62
73
|
|
63
74
|
#+RESULTS:
|
64
75
|
#+begin_example
|
65
|
-
Usage: log_sense [options] [logfile]
|
76
|
+
Usage: log_sense [options] [logfile ...]
|
66
77
|
--title=TITLE Title to use in the report
|
67
78
|
-f, --input-format=FORMAT Input format (either rails or apache)
|
68
|
-
-i, --input-file
|
79
|
+
-i, --input-files=file,file, Input files (can also be passed directly)
|
69
80
|
-t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
|
70
81
|
-o, --output-file=OUTPUT_FILE Output file
|
71
82
|
-b, --begin=DATE Consider entries after or on DATE
|
72
83
|
-e, --end=DATE Consider entries before or on DATE
|
73
|
-
-l, --limit=N
|
84
|
+
-l, --limit=N Limit to the N most requested resources (defaults to 900)
|
85
|
+
-w, --width=WIDTH Maximum width of URL and description columns in text reports
|
74
86
|
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
75
87
|
-n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
|
88
|
+
--verbose Inform about progress (prints to STDERR)
|
76
89
|
-v, --version Prints version information
|
77
90
|
-h, --help Prints this help
|
78
91
|
|
79
|
-
This is version 1.
|
92
|
+
This is version 1.5.1
|
80
93
|
|
81
94
|
Output formats
|
82
95
|
rails parsing can produce the following outputs:
|
@@ -85,6 +98,7 @@ generated files are then made available on a private area on the web.
|
|
85
98
|
- html
|
86
99
|
apache parsing can produce the following outputs:
|
87
100
|
- sqlite
|
101
|
+
- txt
|
88
102
|
- html
|
89
103
|
#+end_example
|
90
104
|
|
@@ -95,6 +109,7 @@ log_sense -f apache -i access.log -t txt > access-data.txt
|
|
95
109
|
log_sense -f rails -i production.log -t html -o performance.txt
|
96
110
|
#+end_example
|
97
111
|
|
112
|
+
|
98
113
|
* Change Log
|
99
114
|
|
100
115
|
See the [[file:CHANGELOG.org][CHANGELOG]] file.
|
@@ -109,8 +124,8 @@ Concerning the outputs:
|
|
109
124
|
- HTML reports use [[https://get.foundation/][Zurb Foundation]], [[https://www.datatables.net/][Data Tables]], and [[https://vega.github.io/vega-lite/][Vega Light]], which
|
110
125
|
are all downloaded from a CDN
|
111
126
|
- The textual format is compatible with [[https://orgmode.org/][Org Mode]] and can be further
|
112
|
-
processed to any format [[https://orgmode.org/][Org Mode]] can be exported to
|
113
|
-
and PDF
|
127
|
+
processed to any format [[https://orgmode.org/][Org Mode]] can be exported to, including HTML
|
128
|
+
and PDF, with the word of warning in the section above.
|
114
129
|
|
115
130
|
* Author and Contributors
|
116
131
|
|
@@ -118,8 +133,8 @@ Concerning the outputs:
|
|
118
133
|
|
119
134
|
* Known Bugs
|
120
135
|
|
121
|
-
No known bugs; an unknown number of unknown bugs.
|
122
|
-
|
136
|
+
No known bugs; an unknown number of unknown bugs. (See the open issues for
|
137
|
+
the known bugs.)
|
123
138
|
|
124
139
|
* License
|
125
140
|
|
data/Rakefile
CHANGED
@@ -9,7 +9,21 @@ end
|
|
9
9
|
require_relative './lib/log_sense/ip_locator.rb'
|
10
10
|
|
11
11
|
desc "Convert Geolocation DB to sqlite"
|
12
|
-
task :dbip_to_sqlite3, [:
|
13
|
-
filename = args[:
|
14
|
-
|
12
|
+
task :dbip_to_sqlite3, [:year_month] do |tasks, args|
|
13
|
+
filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
|
14
|
+
|
15
|
+
if !File.exist? filename
|
16
|
+
puts "Error. Could not find: #{filename}"
|
17
|
+
puts
|
18
|
+
puts 'I see the following files:'
|
19
|
+
puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
|
20
|
+
puts ''
|
21
|
+
puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
|
22
|
+
puts '2. Save downloaded file to ip_locations/'
|
23
|
+
puts '3. Relaunch with YYYY-MM'
|
24
|
+
|
25
|
+
exit
|
26
|
+
else
|
27
|
+
LogSense::IpLocator::dbip_to_sqlite filename
|
28
|
+
end
|
15
29
|
end
|
data/exe/log_sense
CHANGED
@@ -7,21 +7,22 @@ require 'log_sense.rb'
|
|
7
7
|
#
|
8
8
|
|
9
9
|
# this better be here... OptionsParser consumes ARGV
|
10
|
-
@command_line = ARGV.join(
|
11
|
-
|
10
|
+
@command_line = ARGV.join(' ')
|
12
11
|
@options = LogSense::OptionsParser.parse ARGV
|
13
|
-
@input_file = @options[:input_file] || ARGV[0]
|
14
12
|
@output_file = @options[:output_file]
|
15
13
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
#
|
15
|
+
# Input files can be gotten from an option and from what remains in
|
16
|
+
# ARGV
|
17
|
+
#
|
18
|
+
@input_filenames = @options[:input_filenames] + ARGV
|
19
|
+
@non_existing = @input_filenames.reject { |x| File.exist?(x) }
|
20
20
|
|
21
|
-
|
22
|
-
puts "Error: input file '#{@
|
21
|
+
unless @non_existing.empty?
|
22
|
+
$stderr.puts "Error: input file(s) '#{@non_existing.join(', ')}' do not exist"
|
23
23
|
exit 1
|
24
24
|
end
|
25
|
+
@input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
|
25
26
|
|
26
27
|
#
|
27
28
|
# Parse Log and Track Statistics
|
@@ -36,32 +37,46 @@ when 'apache'
|
|
36
37
|
when 'rails'
|
37
38
|
parser_klass = LogSense::RailsLogParser
|
38
39
|
cruncher_klass = LogSense::RailsDataCruncher
|
40
|
+
else
|
41
|
+
$stderr.puts "Error: input format #{@options[:input_format]} not understood."
|
42
|
+
exit 1
|
39
43
|
end
|
40
44
|
|
41
|
-
|
45
|
+
$stderr.puts "Parsing input files..." if @options[:verbose]
|
46
|
+
@db = parser_klass.parse @input_files
|
42
47
|
|
43
|
-
if @options[:output_format]
|
44
|
-
|
48
|
+
if @options[:output_format] == 'sqlite'
|
49
|
+
$stderr.puts "Saving to SQLite3..." if @options[:verbose]
|
50
|
+
ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
|
45
51
|
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
46
52
|
b.step(-1) #=> DONE
|
47
53
|
b.finish
|
48
54
|
else
|
55
|
+
$stderr.puts "Aggregating data..." if @options[:verbose]
|
49
56
|
@data = cruncher_klass.crunch @db, @options
|
57
|
+
|
58
|
+
$stderr.puts "Geolocating..." if @options[:verbose]
|
50
59
|
@data = LogSense::IpLocator.geolocate @data
|
51
60
|
|
61
|
+
$stderr.puts "Grouping by country..." if @options[:verbose]
|
62
|
+
country_col = @data[:ips][0].size - 1
|
63
|
+
@data[:countries] = @data[:ips].group_by { |x| x[country_col] }
|
64
|
+
|
52
65
|
@ended_at = Time.now
|
53
66
|
@duration = @ended_at - @started_at
|
54
67
|
|
55
68
|
@data = @data.merge({
|
56
69
|
command: @command_line,
|
57
|
-
|
70
|
+
filenames: ARGV,
|
71
|
+
log_files: @input_files,
|
58
72
|
started_at: @started_at,
|
59
73
|
ended_at: @ended_at,
|
60
|
-
duration: @duration
|
74
|
+
duration: @duration,
|
75
|
+
width: @options[:width]
|
61
76
|
})
|
62
|
-
|
63
77
|
#
|
64
78
|
# Emit Output
|
65
79
|
#
|
80
|
+
$stderr.puts "Emitting..." if @options[:verbose]
|
66
81
|
puts LogSense::Emitter.emit @data, @options
|
67
82
|
end
|
Binary file
|
@@ -15,17 +15,17 @@ module LogSense
|
|
15
15
|
@last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
|
16
16
|
|
17
17
|
@total_days = 0
|
18
|
-
if @first_day
|
19
|
-
|
20
|
-
|
18
|
+
@total_days = (@last_day - @first_day).to_i if @first_day && @last_day
|
19
|
+
|
20
|
+
@source_files = db.execute 'SELECT distinct(source_file) from LogLine'
|
21
21
|
|
22
|
-
@log_size = db.execute
|
22
|
+
@log_size = db.execute 'SELECT count(datetime) from LogLine'
|
23
23
|
@log_size = @log_size[0][0]
|
24
24
|
|
25
25
|
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
26
26
|
@selfpolls_size = @selfpolls_size[0][0]
|
27
27
|
|
28
|
-
@crawlers_size = db.execute
|
28
|
+
@crawlers_size = db.execute 'SELECT count(datetime) from LogLine where bot == 1'
|
29
29
|
@crawlers_size = @crawlers_size[0][0]
|
30
30
|
|
31
31
|
@first_day_requested = options[:from_date]
|
@@ -35,7 +35,7 @@ module LogSense
|
|
35
35
|
@last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
|
36
36
|
|
37
37
|
@total_days_in_analysis = 0
|
38
|
-
if @first_day_in_analysis
|
38
|
+
if @first_day_in_analysis && @last_day_in_analysis
|
39
39
|
@total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
|
40
40
|
end
|
41
41
|
|
@@ -45,24 +45,24 @@ module LogSense
|
|
45
45
|
filter = [
|
46
46
|
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
47
47
|
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
48
|
-
(options[:only_crawlers] ?
|
49
|
-
(options[:ignore_crawlers] ?
|
48
|
+
(options[:only_crawlers] ? 'bot == 1' : nil),
|
49
|
+
(options[:ignore_crawlers] ? 'bot == 0' : nil),
|
50
50
|
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
51
|
-
|
51
|
+
'true'
|
52
52
|
].compact.join " and "
|
53
53
|
|
54
54
|
mega = 1024 * 1024
|
55
55
|
giga = mega * 1024
|
56
56
|
tera = giga * 1024
|
57
|
-
|
57
|
+
|
58
58
|
# in alternative to sum(size)
|
59
59
|
human_readable_size = <<-EOS
|
60
|
-
CASE
|
60
|
+
CASE
|
61
61
|
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
62
62
|
WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
63
63
|
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
64
64
|
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
65
|
-
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
65
|
+
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
66
66
|
END AS size
|
67
67
|
EOS
|
68
68
|
|
@@ -117,20 +117,19 @@ module LogSense
|
|
117
117
|
|
118
118
|
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
119
119
|
|
120
|
-
@streaks = db.execute
|
120
|
+
@streaks = db.execute 'SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime'
|
121
121
|
data = {}
|
122
122
|
|
123
|
-
|
124
|
-
var_as_symbol = variable.to_s[1
|
125
|
-
data[var_as_symbol] =
|
123
|
+
instance_variables.each do |variable|
|
124
|
+
var_as_symbol = variable.to_s[1..].to_sym
|
125
|
+
data[var_as_symbol] = instance_variable_get(variable)
|
126
126
|
end
|
127
|
+
|
127
128
|
data
|
128
129
|
end
|
129
130
|
|
130
|
-
|
131
|
-
|
132
|
-
def self.date_intersect date1, date2, method
|
133
|
-
if date1 and date2
|
131
|
+
def self.date_intersect(date1, date2, method)
|
132
|
+
if date1 && date2
|
134
133
|
[date1, date2].send(method)
|
135
134
|
elsif date1
|
136
135
|
date1
|
@@ -140,4 +139,3 @@ module LogSense
|
|
140
139
|
end
|
141
140
|
end
|
142
141
|
end
|
143
|
-
|
@@ -31,20 +31,20 @@ module LogSense
|
|
31
31
|
|
32
32
|
TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
|
33
33
|
|
34
|
-
HTTP_METHODS
|
35
|
-
WEBDAV_METHODS
|
36
|
-
OTHER_METHODS
|
37
|
-
METHOD
|
38
|
-
PROTOCOL
|
39
|
-
URL
|
40
|
-
REFERER
|
41
|
-
RETURN_CODE
|
42
|
-
SIZE
|
34
|
+
HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
|
35
|
+
WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
|
36
|
+
OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
|
37
|
+
METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
|
38
|
+
PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
|
39
|
+
URL = /(?<url>[^ ]+)/
|
40
|
+
REFERER = /(?<referer>[^"]*)/
|
41
|
+
RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
|
42
|
+
SIZE = /(?<size>[0-9]+|-)/
|
43
43
|
USER_AGENT = /(?<user_agent>[^"]*)/
|
44
44
|
|
45
45
|
attr_reader :format
|
46
46
|
|
47
|
-
def initialize
|
47
|
+
def initialize
|
48
48
|
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
49
49
|
end
|
50
50
|
|
@@ -7,10 +7,9 @@ module LogSense
|
|
7
7
|
# parse an Apache log file and return a SQLite3 DB
|
8
8
|
#
|
9
9
|
|
10
|
-
def self.parse
|
11
|
-
|
10
|
+
def self.parse(streams, options = {})
|
11
|
+
db = SQLite3::Database.new ':memory:'
|
12
12
|
|
13
|
-
db = SQLite3::Database.new ":memory:"
|
14
13
|
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
15
14
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
16
15
|
datetime TEXT,
|
@@ -28,15 +27,18 @@ module LogSense
|
|
28
27
|
browser TEXT,
|
29
28
|
browser_version TEXT,
|
30
29
|
platform TEXT,
|
31
|
-
platform_version TEXT
|
30
|
+
platform_version TEXT,
|
31
|
+
source_file TEXT,
|
32
|
+
line_number INTEGER
|
33
|
+
)"
|
32
34
|
|
33
|
-
ins = db.prepare(
|
34
|
-
datetime,
|
35
|
+
ins = db.prepare("insert into LogLine (
|
36
|
+
datetime,
|
35
37
|
ip,
|
36
38
|
user,
|
37
39
|
unique_visitor,
|
38
40
|
method,
|
39
|
-
path,
|
41
|
+
path,
|
40
42
|
extension,
|
41
43
|
status,
|
42
44
|
size,
|
@@ -46,44 +48,50 @@ module LogSense
|
|
46
48
|
browser,
|
47
49
|
browser_version,
|
48
50
|
platform,
|
49
|
-
platform_version
|
50
|
-
|
51
|
+
platform_version,
|
52
|
+
source_file,
|
53
|
+
line_number
|
54
|
+
)
|
55
|
+
values (#{Array.new(18, '?').join(', ')})")
|
51
56
|
|
52
57
|
parser = ApacheLogLineParser.new
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
58
|
+
|
59
|
+
streams.each do |stream|
|
60
|
+
stream.readlines.each_with_index do |line, line_number|
|
61
|
+
begin
|
62
|
+
hash = parser.parse line
|
63
|
+
ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
|
64
|
+
ins.execute(
|
65
|
+
DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
|
66
|
+
hash[:ip],
|
67
|
+
hash[:userid],
|
68
|
+
unique_visitor_id(hash),
|
69
|
+
hash[:method],
|
70
|
+
hash[:url],
|
71
|
+
(hash[:url] ? File.extname(hash[:url]) : ''),
|
72
|
+
hash[:status],
|
73
|
+
hash[:size].to_i,
|
74
|
+
hash[:referer],
|
75
|
+
hash[:user_agent],
|
76
|
+
ua.bot? ? 1 : 0,
|
77
|
+
(ua.name || ''),
|
78
|
+
(ua.version || ''),
|
79
|
+
(ua.platform.name || ''),
|
80
|
+
(ua.platform.version || ''),
|
81
|
+
stream == $stdin ? "stdin" : stream.path,
|
82
|
+
line_number
|
83
|
+
)
|
84
|
+
rescue StandardError => e
|
85
|
+
$stderr.puts e.message
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
|
-
|
89
|
+
|
81
90
|
db
|
82
91
|
end
|
83
92
|
|
84
93
|
def self.unique_visitor_id hash
|
85
94
|
"#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
|
86
95
|
end
|
87
|
-
|
88
96
|
end
|
89
97
|
end
|