log_sense 1.3.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +46 -0
- data/Gemfile.lock +4 -4
- data/README.org +24 -10
- data/Rakefile +17 -3
- data/exe/log_sense +24 -16
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/apache_data_cruncher.rb +30 -30
- data/lib/log_sense/apache_log_line_parser.rb +12 -13
- data/lib/log_sense/apache_log_parser.rb +44 -36
- data/lib/log_sense/emitter.rb +518 -15
- data/lib/log_sense/ip_locator.rb +26 -19
- data/lib/log_sense/options_parser.rb +35 -30
- data/lib/log_sense/rails_data_cruncher.rb +8 -4
- data/lib/log_sense/rails_log_parser.rb +108 -100
- data/lib/log_sense/templates/_command_invocation.html.erb +0 -4
- data/lib/log_sense/templates/_command_invocation.txt.erb +4 -3
- data/lib/log_sense/templates/_navigation.html.erb +21 -0
- data/lib/log_sense/templates/_output_table.html.erb +2 -7
- data/lib/log_sense/templates/_output_table.txt.erb +14 -0
- data/lib/log_sense/templates/_performance.html.erb +1 -1
- data/lib/log_sense/templates/_performance.txt.erb +8 -5
- data/lib/log_sense/templates/_report_data.html.erb +2 -2
- data/lib/log_sense/templates/_summary.html.erb +6 -1
- data/lib/log_sense/templates/_summary.txt.erb +11 -8
- data/lib/log_sense/templates/_warning.txt.erb +1 -0
- data/lib/log_sense/templates/apache.html.erb +14 -335
- data/lib/log_sense/templates/apache.txt.erb +22 -0
- data/lib/log_sense/templates/rails.html.erb +13 -174
- data/lib/log_sense/templates/rails.txt.erb +10 -60
- data/lib/log_sense/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0128717b2ba709bc5dfbb7b755762a757c575ad4307159910cc894aaa3b88f42'
|
4
|
+
data.tar.gz: e05054b8eee79a439f5b077e60bc0d95a3e7706c550853333d7d631c458abf91
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d9e3dc495f7479292ae96d1bf6298f531258cae74df47ff705aea9613880b0d50aa6a19328f70685484ad1c606bd12a8fb7c87632fe5c9cbefefe4893d9bb4d
|
7
|
+
data.tar.gz: b417049bcc119ed82ab4c33d007e15804ba85b2485308ca28448fba512814fc5e8b8b215310bfb5c8108fcdc87292322eefc6826b18718fcbc7fced29eea77cb
|
data/CHANGELOG.org
CHANGED
@@ -2,6 +2,52 @@
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
3
3
|
#+STARTUP: showall
|
4
4
|
|
5
|
+
* 1.5.0
|
6
|
+
|
7
|
+
- [User] Present Unique Visits / day as integer
|
8
|
+
- [User] Added Country and Streaks report for rails
|
9
|
+
- [User] Changed Streak report in Apache
|
10
|
+
|
11
|
+
- [Gem] Updated DBIP
|
12
|
+
- [Gem] Updated Bundle
|
13
|
+
|
14
|
+
- [Code] Refactored all reports, so that they are specified
|
15
|
+
in the same way
|
16
|
+
- [Code] Refactor warning message in textual reports
|
17
|
+
- [Code] Build HTML menu for report specification
|
18
|
+
- [Code] Various refactoring passes on the code
|
19
|
+
|
20
|
+
* 1.4.1
|
21
|
+
|
22
|
+
- [User] New textual report for Apache
|
23
|
+
- [User] New option -w sets maximum width of URL, Path, and
|
24
|
+
Description columns in textual reports
|
25
|
+
- [User] Removed option -i, since input filenames are now taken
|
26
|
+
as direct arguments
|
27
|
+
- [User] Allow multiple files in input
|
28
|
+
- [Fixed] Complain if input format is not supported
|
29
|
+
- [Code] Refactoring of reports to manage better output to
|
30
|
+
multiple formats
|
31
|
+
|
32
|
+
* 1.4.0
|
33
|
+
|
34
|
+
- [User] The Apache Log report now organizes page requests in four
|
35
|
+
tables:
|
36
|
+
- success on HTML pages
|
37
|
+
- success on other resources
|
38
|
+
- failures on HTML pages
|
39
|
+
- failures on other resources
|
40
|
+
- [User] Increased the default limit of pages in reports to 900
|
41
|
+
- [User] The return status in now included in the page and resources
|
42
|
+
reports
|
43
|
+
- [User] The "Attack" table has been removed, since the data can be
|
44
|
+
gotten from the previous tables
|
45
|
+
- [Fixed] HTML pages are those with extension ".html" and ".htm"
|
46
|
+
- [Fixed] Wrong data on summary table of the apache report has
|
47
|
+
been fixed
|
48
|
+
- [Fixed] Better JavaScript escaping to avoid log poisoning
|
49
|
+
- [Fixed] Strengthened the Apache log parser
|
50
|
+
|
5
51
|
* 1.3.3 and 1.3.4
|
6
52
|
|
7
53
|
- [Gem] Moved repository to Github and fixes to gemspec
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
log_sense (1.
|
4
|
+
log_sense (1.4.2)
|
5
5
|
browser
|
6
6
|
ipaddr
|
7
7
|
iso_country_codes
|
@@ -13,9 +13,9 @@ GEM
|
|
13
13
|
specs:
|
14
14
|
browser (5.3.1)
|
15
15
|
byebug (11.1.3)
|
16
|
-
ipaddr (1.2.
|
16
|
+
ipaddr (1.2.4)
|
17
17
|
iso_country_codes (0.7.8)
|
18
|
-
minitest (5.
|
18
|
+
minitest (5.15.0)
|
19
19
|
rake (12.3.3)
|
20
20
|
sqlite3 (1.4.2)
|
21
21
|
terminal-table (3.0.2)
|
@@ -32,4 +32,4 @@ DEPENDENCIES
|
|
32
32
|
rake (~> 12.0)
|
33
33
|
|
34
34
|
BUNDLED WITH
|
35
|
-
2.
|
35
|
+
2.3.3
|
data/README.org
CHANGED
@@ -19,8 +19,6 @@ LogSense reports the following data:
|
|
19
19
|
- OS, browsers, and devices
|
20
20
|
- IP Country location, thanks to the DPIP lite country DB
|
21
21
|
- Streaks: resources accessed by a given IP over time
|
22
|
-
- Potential attacks: access to resources which are not meant to be
|
23
|
-
served by a web server serving static websites
|
24
22
|
- Performance of Rails requests
|
25
23
|
|
26
24
|
Filters from the command line allow to analyze specific periods and
|
@@ -33,6 +31,18 @@ And, of course, the compulsory screenshot:
|
|
33
31
|
#+ATTR_HTML: :width 80%
|
34
32
|
[[file:./apache-screenshot.png]]
|
35
33
|
|
34
|
+
|
35
|
+
* An important word of warning
|
36
|
+
|
37
|
+
[[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
|
38
|
+
user input to forge log entries or inject malicious content into the logs.
|
39
|
+
|
40
|
+
log_sense sanitizes entries of HTML reports, to try and protect from log
|
41
|
+
poisoning. *Log entries and URLs in SQLite3, however, are not sanitized*:
|
42
|
+
they are stored and read from the log. This is not, in general, an issue,
|
43
|
+
unless you use the data from SQLite in environments in which URLs can be
|
44
|
+
opened or code executed.
|
45
|
+
|
36
46
|
* Motivation
|
37
47
|
|
38
48
|
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
|
@@ -54,6 +64,7 @@ generated files are then made available on a private area on the web.
|
|
54
64
|
gem install log_sense
|
55
65
|
#+end_src
|
56
66
|
|
67
|
+
|
57
68
|
* Usage
|
58
69
|
|
59
70
|
#+begin_src bash :results raw output :wrap example
|
@@ -62,21 +73,22 @@ generated files are then made available on a private area on the web.
|
|
62
73
|
|
63
74
|
#+RESULTS:
|
64
75
|
#+begin_example
|
65
|
-
Usage: log_sense [options] [logfile]
|
76
|
+
Usage: log_sense [options] [logfile ...]
|
66
77
|
--title=TITLE Title to use in the report
|
67
78
|
-f, --input-format=FORMAT Input format (either rails or apache)
|
68
|
-
-i, --input-file=INPUT_FILE Input file
|
69
79
|
-t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
|
70
80
|
-o, --output-file=OUTPUT_FILE Output file
|
71
81
|
-b, --begin=DATE Consider entries after or on DATE
|
72
82
|
-e, --end=DATE Consider entries before or on DATE
|
73
|
-
-l, --limit=N
|
83
|
+
-l, --limit=N Limit to the N most requested resources (defaults to 900)
|
84
|
+
-w, --width=WIDTH Maximum width of URL and description columns in text reports
|
74
85
|
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
75
86
|
-n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
|
87
|
+
--verbose Inform about progress (prints to STDERR)
|
76
88
|
-v, --version Prints version information
|
77
89
|
-h, --help Prints this help
|
78
90
|
|
79
|
-
This is version 1.
|
91
|
+
This is version 1.5.0
|
80
92
|
|
81
93
|
Output formats
|
82
94
|
rails parsing can produce the following outputs:
|
@@ -85,6 +97,7 @@ generated files are then made available on a private area on the web.
|
|
85
97
|
- html
|
86
98
|
apache parsing can produce the following outputs:
|
87
99
|
- sqlite
|
100
|
+
- txt
|
88
101
|
- html
|
89
102
|
#+end_example
|
90
103
|
|
@@ -95,6 +108,7 @@ log_sense -f apache -i access.log -t txt > access-data.txt
|
|
95
108
|
log_sense -f rails -i production.log -t html -o performance.txt
|
96
109
|
#+end_example
|
97
110
|
|
111
|
+
|
98
112
|
* Change Log
|
99
113
|
|
100
114
|
See the [[file:CHANGELOG.org][CHANGELOG]] file.
|
@@ -109,8 +123,8 @@ Concerning the outputs:
|
|
109
123
|
- HTML reports use [[https://get.foundation/][Zurb Foundation]], [[https://www.datatables.net/][Data Tables]], and [[https://vega.github.io/vega-lite/][Vega Light]], which
|
110
124
|
are all downloaded from a CDN
|
111
125
|
- The textual format is compatible with [[https://orgmode.org/][Org Mode]] and can be further
|
112
|
-
processed to any format [[https://orgmode.org/][Org Mode]] can be exported to
|
113
|
-
and PDF
|
126
|
+
processed to any format [[https://orgmode.org/][Org Mode]] can be exported to, including HTML
|
127
|
+
and PDF, with the word of warning in the section above.
|
114
128
|
|
115
129
|
* Author and Contributors
|
116
130
|
|
@@ -118,8 +132,8 @@ Concerning the outputs:
|
|
118
132
|
|
119
133
|
* Known Bugs
|
120
134
|
|
121
|
-
No known bugs; an unknown number of unknown bugs.
|
122
|
-
|
135
|
+
No known bugs; an unknown number of unknown bugs. (See the open issues for
|
136
|
+
the known bugs.)
|
123
137
|
|
124
138
|
* License
|
125
139
|
|
data/Rakefile
CHANGED
@@ -9,7 +9,21 @@ end
|
|
9
9
|
require_relative './lib/log_sense/ip_locator.rb'
|
10
10
|
|
11
11
|
desc "Convert Geolocation DB to sqlite"
|
12
|
-
task :dbip_to_sqlite3, [:
|
13
|
-
filename = args[:
|
14
|
-
|
12
|
+
task :dbip_to_sqlite3, [:year_month] do |tasks, args|
|
13
|
+
filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
|
14
|
+
|
15
|
+
if !File.exist? filename
|
16
|
+
puts "Error. Could not find: #{filename}"
|
17
|
+
puts
|
18
|
+
puts 'I see the following files:'
|
19
|
+
puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
|
20
|
+
puts ''
|
21
|
+
puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
|
22
|
+
puts '2. Save downloaded file to ip_locations/'
|
23
|
+
puts '3. Relaunch with YYYY-MM'
|
24
|
+
|
25
|
+
exit
|
26
|
+
else
|
27
|
+
LogSense::IpLocator::dbip_to_sqlite filename
|
28
|
+
end
|
15
29
|
end
|
data/exe/log_sense
CHANGED
@@ -7,21 +7,15 @@ require 'log_sense.rb'
|
|
7
7
|
#
|
8
8
|
|
9
9
|
# this better be here... OptionsParser consumes ARGV
|
10
|
-
@command_line = ARGV.join(
|
11
|
-
|
10
|
+
@command_line = ARGV.join(' ')
|
12
11
|
@options = LogSense::OptionsParser.parse ARGV
|
13
|
-
@input_file = @options[:input_file] || ARGV[0]
|
14
12
|
@output_file = @options[:output_file]
|
15
13
|
|
16
|
-
if
|
17
|
-
puts "Error:
|
18
|
-
exit
|
19
|
-
end
|
20
|
-
|
21
|
-
if not File.exist? @input_file
|
22
|
-
puts "Error: input file '#{@input_file}' does not exist"
|
14
|
+
if ARGV.map { |x| File.exist?(x) }.include?(false)
|
15
|
+
$stderr.puts "Error: input file(s) '#{ARGV.reject { |x| File.exist(x) }.join(', ')}' do not exist"
|
23
16
|
exit 1
|
24
17
|
end
|
18
|
+
@input_files = ARGV.empty? ? [$stdin] : ARGV.map { |x| File.open(x, 'r') }
|
25
19
|
|
26
20
|
#
|
27
21
|
# Parse Log and Track Statistics
|
@@ -36,32 +30,46 @@ when 'apache'
|
|
36
30
|
when 'rails'
|
37
31
|
parser_klass = LogSense::RailsLogParser
|
38
32
|
cruncher_klass = LogSense::RailsDataCruncher
|
33
|
+
else
|
34
|
+
$stderr.puts "Error: input format #{@options[:input_format]} not understood."
|
35
|
+
exit 1
|
39
36
|
end
|
40
37
|
|
41
|
-
|
38
|
+
$stderr.puts "Parsing input files..." if @options[:verbose]
|
39
|
+
@db = parser_klass.parse @input_files
|
42
40
|
|
43
|
-
if @options[:output_format]
|
44
|
-
|
41
|
+
if @options[:output_format] == 'sqlite'
|
42
|
+
$stderr.puts "Saving to SQLite3..." if @options[:verbose]
|
43
|
+
ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
|
45
44
|
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
46
45
|
b.step(-1) #=> DONE
|
47
46
|
b.finish
|
48
47
|
else
|
48
|
+
$stderr.puts "Aggregating data..." if @options[:verbose]
|
49
49
|
@data = cruncher_klass.crunch @db, @options
|
50
|
+
|
51
|
+
$stderr.puts "Geolocating..." if @options[:verbose]
|
50
52
|
@data = LogSense::IpLocator.geolocate @data
|
51
53
|
|
54
|
+
$stderr.puts "Grouping by country..." if @options[:verbose]
|
55
|
+
country_col = @data[:ips][0].size - 1
|
56
|
+
@data[:countries] = @data[:ips].group_by { |x| x[country_col] }
|
57
|
+
|
52
58
|
@ended_at = Time.now
|
53
59
|
@duration = @ended_at - @started_at
|
54
60
|
|
55
61
|
@data = @data.merge({
|
56
62
|
command: @command_line,
|
57
|
-
|
63
|
+
filenames: ARGV,
|
64
|
+
log_files: @input_files,
|
58
65
|
started_at: @started_at,
|
59
66
|
ended_at: @ended_at,
|
60
|
-
duration: @duration
|
67
|
+
duration: @duration,
|
68
|
+
width: @options[:width]
|
61
69
|
})
|
62
|
-
|
63
70
|
#
|
64
71
|
# Emit Output
|
65
72
|
#
|
73
|
+
$stderr.puts "Emitting..." if @options[:verbose]
|
66
74
|
puts LogSense::Emitter.emit @data, @options
|
67
75
|
end
|
Binary file
|
@@ -6,7 +6,7 @@ module LogSense
|
|
6
6
|
# @ variables are automatically put in the returned data
|
7
7
|
#
|
8
8
|
|
9
|
-
def self.crunch db, options = { limit:
|
9
|
+
def self.crunch db, options = { limit: 900 }
|
10
10
|
first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
11
11
|
last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
12
12
|
|
@@ -15,17 +15,17 @@ module LogSense
|
|
15
15
|
@last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
|
16
16
|
|
17
17
|
@total_days = 0
|
18
|
-
if @first_day
|
19
|
-
|
20
|
-
|
18
|
+
@total_days = (@last_day - @first_day).to_i if @first_day && @last_day
|
19
|
+
|
20
|
+
@source_files = db.execute 'SELECT distinct(source_file) from LogLine'
|
21
21
|
|
22
|
-
@log_size = db.execute
|
22
|
+
@log_size = db.execute 'SELECT count(datetime) from LogLine'
|
23
23
|
@log_size = @log_size[0][0]
|
24
24
|
|
25
25
|
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
26
26
|
@selfpolls_size = @selfpolls_size[0][0]
|
27
27
|
|
28
|
-
@crawlers_size = db.execute
|
28
|
+
@crawlers_size = db.execute 'SELECT count(datetime) from LogLine where bot == 1'
|
29
29
|
@crawlers_size = @crawlers_size[0][0]
|
30
30
|
|
31
31
|
@first_day_requested = options[:from_date]
|
@@ -35,7 +35,7 @@ module LogSense
|
|
35
35
|
@last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
|
36
36
|
|
37
37
|
@total_days_in_analysis = 0
|
38
|
-
if @first_day_in_analysis
|
38
|
+
if @first_day_in_analysis && @last_day_in_analysis
|
39
39
|
@total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
|
40
40
|
end
|
41
41
|
|
@@ -45,24 +45,24 @@ module LogSense
|
|
45
45
|
filter = [
|
46
46
|
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
47
47
|
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
48
|
-
(options[:only_crawlers] ?
|
49
|
-
(options[:ignore_crawlers] ?
|
48
|
+
(options[:only_crawlers] ? 'bot == 1' : nil),
|
49
|
+
(options[:ignore_crawlers] ? 'bot == 0' : nil),
|
50
50
|
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
51
|
-
|
51
|
+
'true'
|
52
52
|
].compact.join " and "
|
53
53
|
|
54
54
|
mega = 1024 * 1024
|
55
55
|
giga = mega * 1024
|
56
56
|
tera = giga * 1024
|
57
|
-
|
57
|
+
|
58
58
|
# in alternative to sum(size)
|
59
59
|
human_readable_size = <<-EOS
|
60
|
-
CASE
|
60
|
+
CASE
|
61
61
|
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
62
62
|
WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
63
63
|
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
64
64
|
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
65
|
-
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
65
|
+
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
66
66
|
END AS size
|
67
67
|
EOS
|
68
68
|
|
@@ -89,16 +89,18 @@ module LogSense
|
|
89
89
|
|
90
90
|
@daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
|
91
91
|
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
|
92
|
-
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
93
|
-
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
94
|
-
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
95
|
-
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
96
92
|
|
97
|
-
|
98
|
-
|
99
|
-
|
93
|
+
good_statuses = "(status like '2%' or status like '3%')"
|
94
|
+
bad_statuses = "(status like '4%' or status like '5%')"
|
95
|
+
html_page = "(extension like '.htm%')"
|
96
|
+
non_html_page = "(extension not like '.htm%')"
|
97
|
+
|
98
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
99
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
100
|
+
|
101
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
102
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
100
103
|
|
101
|
-
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
102
104
|
@statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
|
103
105
|
|
104
106
|
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
|
@@ -115,20 +117,19 @@ module LogSense
|
|
115
117
|
|
116
118
|
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
117
119
|
|
118
|
-
@streaks = db.execute
|
120
|
+
@streaks = db.execute 'SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime'
|
119
121
|
data = {}
|
120
122
|
|
121
|
-
|
122
|
-
var_as_symbol = variable.to_s[1
|
123
|
-
data[var_as_symbol] =
|
123
|
+
instance_variables.each do |variable|
|
124
|
+
var_as_symbol = variable.to_s[1..].to_sym
|
125
|
+
data[var_as_symbol] = instance_variable_get(variable)
|
124
126
|
end
|
127
|
+
|
125
128
|
data
|
126
129
|
end
|
127
130
|
|
128
|
-
|
129
|
-
|
130
|
-
def self.date_intersect date1, date2, method
|
131
|
-
if date1 and date2
|
131
|
+
def self.date_intersect(date1, date2, method)
|
132
|
+
if date1 && date2
|
132
133
|
[date1, date2].send(method)
|
133
134
|
elsif date1
|
134
135
|
date1
|
@@ -138,4 +139,3 @@ module LogSense
|
|
138
139
|
end
|
139
140
|
end
|
140
141
|
end
|
141
|
-
|
@@ -31,22 +31,21 @@ module LogSense
|
|
31
31
|
|
32
32
|
TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
|
33
33
|
|
34
|
-
HTTP_METHODS
|
35
|
-
WEBDAV_METHODS
|
36
|
-
OTHER_METHODS
|
37
|
-
METHOD
|
38
|
-
PROTOCOL
|
39
|
-
URL
|
40
|
-
REFERER
|
41
|
-
RETURN_CODE
|
42
|
-
SIZE
|
43
|
-
|
44
|
-
USER_AGENT = /(?<user_agent>[^"]+)/
|
34
|
+
HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
|
35
|
+
WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
|
36
|
+
OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
|
37
|
+
METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
|
38
|
+
PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
|
39
|
+
URL = /(?<url>[^ ]+)/
|
40
|
+
REFERER = /(?<referer>[^"]*)/
|
41
|
+
RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
|
42
|
+
SIZE = /(?<size>[0-9]+|-)/
|
43
|
+
USER_AGENT = /(?<user_agent>[^"]*)/
|
45
44
|
|
46
45
|
attr_reader :format
|
47
46
|
|
48
|
-
def initialize
|
49
|
-
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
47
|
+
def initialize
|
48
|
+
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
50
49
|
end
|
51
50
|
|
52
51
|
def parse line
|
@@ -7,10 +7,9 @@ module LogSense
|
|
7
7
|
# parse an Apache log file and return a SQLite3 DB
|
8
8
|
#
|
9
9
|
|
10
|
-
def self.parse
|
11
|
-
|
10
|
+
def self.parse(streams, options = {})
|
11
|
+
db = SQLite3::Database.new ':memory:'
|
12
12
|
|
13
|
-
db = SQLite3::Database.new ":memory:"
|
14
13
|
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
15
14
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
16
15
|
datetime TEXT,
|
@@ -28,15 +27,18 @@ module LogSense
|
|
28
27
|
browser TEXT,
|
29
28
|
browser_version TEXT,
|
30
29
|
platform TEXT,
|
31
|
-
platform_version TEXT
|
30
|
+
platform_version TEXT,
|
31
|
+
source_file TEXT,
|
32
|
+
line_number INTEGER
|
33
|
+
)"
|
32
34
|
|
33
|
-
ins = db.prepare(
|
34
|
-
datetime,
|
35
|
+
ins = db.prepare("insert into LogLine (
|
36
|
+
datetime,
|
35
37
|
ip,
|
36
38
|
user,
|
37
39
|
unique_visitor,
|
38
40
|
method,
|
39
|
-
path,
|
41
|
+
path,
|
40
42
|
extension,
|
41
43
|
status,
|
42
44
|
size,
|
@@ -46,44 +48,50 @@ module LogSense
|
|
46
48
|
browser,
|
47
49
|
browser_version,
|
48
50
|
platform,
|
49
|
-
platform_version
|
50
|
-
|
51
|
+
platform_version,
|
52
|
+
source_file,
|
53
|
+
line_number
|
54
|
+
)
|
55
|
+
values (#{Array.new(18, '?').join(', ')})")
|
51
56
|
|
52
57
|
parser = ApacheLogLineParser.new
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
58
|
+
|
59
|
+
streams.each do |stream|
|
60
|
+
stream.readlines.each_with_index do |line, line_number|
|
61
|
+
begin
|
62
|
+
hash = parser.parse line
|
63
|
+
ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
|
64
|
+
ins.execute(
|
65
|
+
DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
|
66
|
+
hash[:ip],
|
67
|
+
hash[:userid],
|
68
|
+
unique_visitor_id(hash),
|
69
|
+
hash[:method],
|
70
|
+
hash[:url],
|
71
|
+
(hash[:url] ? File.extname(hash[:url]) : ''),
|
72
|
+
hash[:status],
|
73
|
+
hash[:size].to_i,
|
74
|
+
hash[:referer],
|
75
|
+
hash[:user_agent],
|
76
|
+
ua.bot? ? 1 : 0,
|
77
|
+
(ua.name || ''),
|
78
|
+
(ua.version || ''),
|
79
|
+
(ua.platform.name || ''),
|
80
|
+
(ua.platform.version || ''),
|
81
|
+
stream == $stdin ? "stdin" : stream.path,
|
82
|
+
line_number
|
83
|
+
)
|
84
|
+
rescue StandardError => e
|
85
|
+
$stderr.puts e.message
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
|
-
|
89
|
+
|
81
90
|
db
|
82
91
|
end
|
83
92
|
|
84
93
|
def self.unique_visitor_id hash
|
85
94
|
"#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
|
86
95
|
end
|
87
|
-
|
88
96
|
end
|
89
97
|
end
|