log_sense 1.3.4 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +35 -0
- data/README.org +5 -4
- data/exe/log_sense +13 -15
- data/lib/log_sense/apache_data_cruncher.rb +14 -12
- data/lib/log_sense/apache_log_line_parser.rb +11 -12
- data/lib/log_sense/apache_log_parser.rb +44 -36
- data/lib/log_sense/emitter.rb +464 -11
- data/lib/log_sense/options_parser.rb +30 -30
- data/lib/log_sense/rails_data_cruncher.rb +5 -4
- data/lib/log_sense/rails_log_parser.rb +108 -100
- data/lib/log_sense/templates/_command_invocation.html.erb +0 -4
- data/lib/log_sense/templates/_command_invocation.txt.erb +4 -3
- data/lib/log_sense/templates/_output_table.html.erb +2 -7
- data/lib/log_sense/templates/_output_table.txt.erb +13 -0
- data/lib/log_sense/templates/_performance.html.erb +1 -1
- data/lib/log_sense/templates/_performance.txt.erb +8 -5
- data/lib/log_sense/templates/_report_data.html.erb +2 -2
- data/lib/log_sense/templates/_summary.html.erb +6 -1
- data/lib/log_sense/templates/_summary.txt.erb +11 -8
- data/lib/log_sense/templates/apache.html.erb +24 -225
- data/lib/log_sense/templates/apache.txt.erb +35 -0
- data/lib/log_sense/templates/rails.html.erb +0 -120
- data/lib/log_sense/templates/rails.txt.erb +8 -57
- data/lib/log_sense/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ef1d35932ba8c7fe6e636f6cb507ea99fd6b6026170d62207dfa2606d62bbcb
|
4
|
+
data.tar.gz: 725dc6e51ace6c9cbd366e3888387ef8f246327c9e1036c66d2eb6351b1f0791
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 592e80cd56f4740cc4003b494c9da960de228025377d630d070a08a495cffac7fd41850d469189510def48f9940700251bd84a141def9b1b15b32fbed967261f
|
7
|
+
data.tar.gz: 1d802e95fd58c66c4904843478c5ccc8ffc1ce43e87dd199e5b21efc89bcff436b227c646d2e5a386bee22a3d1c67c0341c25fcb4058e48526560d05984f0148
|
data/CHANGELOG.org
CHANGED
@@ -2,6 +2,41 @@
|
|
2
2
|
#+AUTHOR: Adolfo Villafiorita
|
3
3
|
#+STARTUP: showall
|
4
4
|
|
5
|
+
* 1.4.1
|
6
|
+
|
7
|
+
- [User] New textual report for Apache
|
8
|
+
- [User] New option -w sets maximum width of URL, Path, and
|
9
|
+
Description columns in textual reports
|
10
|
+
- [User] Removed option -i, since input filenames are now taken
|
11
|
+
as direct arguments
|
12
|
+
- [User] Allow multiple files in input
|
13
|
+
- [Fixed] Complain if input format is not supported
|
14
|
+
- [Code] Refactoring of reports to manage better output to
|
15
|
+
multiple formats
|
16
|
+
|
17
|
+
* 1.4.0
|
18
|
+
|
19
|
+
- [User] The Apache Log report now organizes page requests in four
|
20
|
+
tables:
|
21
|
+
- success on HTML pages
|
22
|
+
- success on other resources
|
23
|
+
- failures on HTML pages
|
24
|
+
- failures on other resources
|
25
|
+
- [User] Increased the default limit of pages in reports to 900
|
26
|
+
- [User] The return status in now included in the page and resources
|
27
|
+
reports
|
28
|
+
- [User] The "Attack" table has been removed, since the data can be
|
29
|
+
gotten from the previous tables
|
30
|
+
- [Fixed] HTML pages are those with extension ".html" and ".htm"
|
31
|
+
- [Fixed] Wrong data on summary table of the apache report has
|
32
|
+
been fixed
|
33
|
+
- [Fixed] Better JavaScript escaping to avoid log poisoning
|
34
|
+
- [Fixed] Strengthened the Apache log parser
|
35
|
+
|
36
|
+
* 1.3.3 and 1.3.4
|
37
|
+
|
38
|
+
- [Gem] Moved repository to Github and fixes to gemspec
|
39
|
+
|
5
40
|
* 1.3.2
|
6
41
|
|
7
42
|
- [Code] HTML reports now generate JSON data which is shared between
|
data/README.org
CHANGED
@@ -62,21 +62,21 @@ generated files are then made available on a private area on the web.
|
|
62
62
|
|
63
63
|
#+RESULTS:
|
64
64
|
#+begin_example
|
65
|
-
Usage: log_sense [options] [logfile]
|
65
|
+
Usage: log_sense [options] [logfile ...]
|
66
66
|
--title=TITLE Title to use in the report
|
67
67
|
-f, --input-format=FORMAT Input format (either rails or apache)
|
68
|
-
-i, --input-file=INPUT_FILE Input file
|
69
68
|
-t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
|
70
69
|
-o, --output-file=OUTPUT_FILE Output file
|
71
70
|
-b, --begin=DATE Consider entries after or on DATE
|
72
71
|
-e, --end=DATE Consider entries before or on DATE
|
73
|
-
-l, --limit=N
|
72
|
+
-l, --limit=N Limit to the N most requested resources (defaults to 900)
|
73
|
+
-w, --width=WIDTH Maximum width of URL and description columns in text reports
|
74
74
|
-c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
|
75
75
|
-n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
|
76
76
|
-v, --version Prints version information
|
77
77
|
-h, --help Prints this help
|
78
78
|
|
79
|
-
This is version 1.
|
79
|
+
This is version 1.4.1
|
80
80
|
|
81
81
|
Output formats
|
82
82
|
rails parsing can produce the following outputs:
|
@@ -85,6 +85,7 @@ generated files are then made available on a private area on the web.
|
|
85
85
|
- html
|
86
86
|
apache parsing can produce the following outputs:
|
87
87
|
- sqlite
|
88
|
+
- txt
|
88
89
|
- html
|
89
90
|
#+end_example
|
90
91
|
|
data/exe/log_sense
CHANGED
@@ -7,21 +7,15 @@ require 'log_sense.rb'
|
|
7
7
|
#
|
8
8
|
|
9
9
|
# this better be here... OptionsParser consumes ARGV
|
10
|
-
@command_line = ARGV.join(
|
11
|
-
|
10
|
+
@command_line = ARGV.join(' ')
|
12
11
|
@options = LogSense::OptionsParser.parse ARGV
|
13
|
-
@input_file = @options[:input_file] || ARGV[0]
|
14
12
|
@output_file = @options[:output_file]
|
15
13
|
|
16
|
-
if
|
17
|
-
puts "Error:
|
18
|
-
exit
|
19
|
-
end
|
20
|
-
|
21
|
-
if not File.exist? @input_file
|
22
|
-
puts "Error: input file '#{@input_file}' does not exist"
|
14
|
+
if ARGV.map { |x| File.exist?(x) }.include?(false)
|
15
|
+
warn.puts "Error: input file(s) '#{ARGV.reject { |x| File.exist(x) }.join(', ')}' do not exist"
|
23
16
|
exit 1
|
24
17
|
end
|
18
|
+
@input_files = ARGV.empty? ? [$stdin] : ARGV.map { |x| File.open(x, 'r') }
|
25
19
|
|
26
20
|
#
|
27
21
|
# Parse Log and Track Statistics
|
@@ -36,12 +30,15 @@ when 'apache'
|
|
36
30
|
when 'rails'
|
37
31
|
parser_klass = LogSense::RailsLogParser
|
38
32
|
cruncher_klass = LogSense::RailsDataCruncher
|
33
|
+
else
|
34
|
+
warn.puts "Error: input format #{@options[:input_format]} not understood."
|
35
|
+
exit 1
|
39
36
|
end
|
40
37
|
|
41
|
-
@db = parser_klass.parse @
|
38
|
+
@db = parser_klass.parse @input_files
|
42
39
|
|
43
|
-
if @options[:output_format]
|
44
|
-
ddb = SQLite3::Database.new(@output_file ||
|
40
|
+
if @options[:output_format] == 'sqlite'
|
41
|
+
ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
|
45
42
|
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
46
43
|
b.step(-1) #=> DONE
|
47
44
|
b.finish
|
@@ -54,10 +51,11 @@ else
|
|
54
51
|
|
55
52
|
@data = @data.merge({
|
56
53
|
command: @command_line,
|
57
|
-
|
54
|
+
log_files: @input_files,
|
58
55
|
started_at: @started_at,
|
59
56
|
ended_at: @ended_at,
|
60
|
-
duration: @duration
|
57
|
+
duration: @duration,
|
58
|
+
width: @options[:width]
|
61
59
|
})
|
62
60
|
|
63
61
|
#
|
@@ -6,7 +6,7 @@ module LogSense
|
|
6
6
|
# @ variables are automatically put in the returned data
|
7
7
|
#
|
8
8
|
|
9
|
-
def self.crunch db, options = { limit:
|
9
|
+
def self.crunch db, options = { limit: 900 }
|
10
10
|
first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
11
11
|
last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
12
12
|
|
@@ -15,9 +15,9 @@ module LogSense
|
|
15
15
|
@last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
|
16
16
|
|
17
17
|
@total_days = 0
|
18
|
-
if @first_day
|
19
|
-
|
20
|
-
|
18
|
+
@total_days = (@last_day - @first_day).to_i if @first_day && @last_day
|
19
|
+
|
20
|
+
@source_files = db.execute "SELECT distinct(source_file) from LogLine"
|
21
21
|
|
22
22
|
@log_size = db.execute "SELECT count(datetime) from LogLine"
|
23
23
|
@log_size = @log_size[0][0]
|
@@ -89,16 +89,18 @@ module LogSense
|
|
89
89
|
|
90
90
|
@daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
|
91
91
|
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
|
92
|
-
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
93
|
-
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
94
|
-
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
95
|
-
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
96
92
|
|
97
|
-
|
98
|
-
|
99
|
-
|
93
|
+
good_statuses = "(status like '2%' or status like '3%')"
|
94
|
+
bad_statuses = "(status like '4%' or status like '5%')"
|
95
|
+
html_page = "(extension like '.htm%')"
|
96
|
+
non_html_page = "(extension not like '.htm%')"
|
97
|
+
|
98
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
99
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
100
|
+
|
101
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
102
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
100
103
|
|
101
|
-
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
102
104
|
@statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
|
103
105
|
|
104
106
|
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
|
@@ -31,22 +31,21 @@ module LogSense
|
|
31
31
|
|
32
32
|
TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
|
33
33
|
|
34
|
-
HTTP_METHODS
|
35
|
-
WEBDAV_METHODS
|
36
|
-
OTHER_METHODS
|
37
|
-
METHOD
|
38
|
-
PROTOCOL
|
39
|
-
URL
|
40
|
-
REFERER
|
41
|
-
RETURN_CODE
|
42
|
-
SIZE
|
43
|
-
|
44
|
-
USER_AGENT = /(?<user_agent>[^"]+)/
|
34
|
+
HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
|
35
|
+
WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
|
36
|
+
OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
|
37
|
+
METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
|
38
|
+
PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
|
39
|
+
URL = /(?<url>[^ ]+)/
|
40
|
+
REFERER = /(?<referer>[^"]*)/
|
41
|
+
RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
|
42
|
+
SIZE = /(?<size>[0-9]+|-)/
|
43
|
+
USER_AGENT = /(?<user_agent>[^"]*)/
|
45
44
|
|
46
45
|
attr_reader :format
|
47
46
|
|
48
47
|
def initialize
|
49
|
-
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
48
|
+
@format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
|
50
49
|
end
|
51
50
|
|
52
51
|
def parse line
|
@@ -7,10 +7,9 @@ module LogSense
|
|
7
7
|
# parse an Apache log file and return a SQLite3 DB
|
8
8
|
#
|
9
9
|
|
10
|
-
def self.parse
|
11
|
-
|
10
|
+
def self.parse(streams, options = {})
|
11
|
+
db = SQLite3::Database.new ':memory:'
|
12
12
|
|
13
|
-
db = SQLite3::Database.new ":memory:"
|
14
13
|
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
15
14
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
16
15
|
datetime TEXT,
|
@@ -28,15 +27,18 @@ module LogSense
|
|
28
27
|
browser TEXT,
|
29
28
|
browser_version TEXT,
|
30
29
|
platform TEXT,
|
31
|
-
platform_version TEXT
|
30
|
+
platform_version TEXT,
|
31
|
+
source_file TEXT,
|
32
|
+
line_number INTEGER
|
33
|
+
)"
|
32
34
|
|
33
|
-
ins = db.prepare(
|
34
|
-
datetime,
|
35
|
+
ins = db.prepare("insert into LogLine (
|
36
|
+
datetime,
|
35
37
|
ip,
|
36
38
|
user,
|
37
39
|
unique_visitor,
|
38
40
|
method,
|
39
|
-
path,
|
41
|
+
path,
|
40
42
|
extension,
|
41
43
|
status,
|
42
44
|
size,
|
@@ -46,44 +48,50 @@ module LogSense
|
|
46
48
|
browser,
|
47
49
|
browser_version,
|
48
50
|
platform,
|
49
|
-
platform_version
|
50
|
-
|
51
|
+
platform_version,
|
52
|
+
source_file,
|
53
|
+
line_number
|
54
|
+
)
|
55
|
+
values (#{Array.new(18, '?').join(', ')})")
|
51
56
|
|
52
57
|
parser = ApacheLogLineParser.new
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
58
|
+
|
59
|
+
streams.each do |stream|
|
60
|
+
stream.readlines.each_with_index do |line, line_number|
|
61
|
+
begin
|
62
|
+
hash = parser.parse line
|
63
|
+
ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
|
64
|
+
ins.execute(
|
65
|
+
DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
|
66
|
+
hash[:ip],
|
67
|
+
hash[:userid],
|
68
|
+
unique_visitor_id(hash),
|
69
|
+
hash[:method],
|
70
|
+
hash[:url],
|
71
|
+
(hash[:url] ? File.extname(hash[:url]) : ''),
|
72
|
+
hash[:status],
|
73
|
+
hash[:size].to_i,
|
74
|
+
hash[:referer],
|
75
|
+
hash[:user_agent],
|
76
|
+
ua.bot? ? 1 : 0,
|
77
|
+
(ua.name || ''),
|
78
|
+
(ua.version || ''),
|
79
|
+
(ua.platform.name || ''),
|
80
|
+
(ua.platform.version || ''),
|
81
|
+
stream == $stdin ? "stdin" : stream.path,
|
82
|
+
line_number
|
83
|
+
)
|
84
|
+
rescue StandardError => e
|
85
|
+
warn.puts e.message
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
|
-
|
89
|
+
|
81
90
|
db
|
82
91
|
end
|
83
92
|
|
84
93
|
def self.unique_visitor_id hash
|
85
94
|
"#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
|
86
95
|
end
|
87
|
-
|
88
96
|
end
|
89
97
|
end
|