log_sense 1.3.4 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8250028b7a7038e07f2bd9fa069bfdf0ab1815a8711a0b6971a91030d529882
4
- data.tar.gz: 236e1ba2c3ada9272f1e7eaa649cd1664776da933a90e41b6e70fffbbe002043
3
+ metadata.gz: 6ef1d35932ba8c7fe6e636f6cb507ea99fd6b6026170d62207dfa2606d62bbcb
4
+ data.tar.gz: 725dc6e51ace6c9cbd366e3888387ef8f246327c9e1036c66d2eb6351b1f0791
5
5
  SHA512:
6
- metadata.gz: 72b6284cd6f09ebf16c4fe34fc7098da44e993a2946fb09f72ccaa7898eb462b9fcb49ce2e37e06b206a3f00650f8a033ec034ffeeab3afe587b6b607d079302
7
- data.tar.gz: 6b96590430caa4e9717180c1812b0227b661f1fef4c99e10515da6da8acbe8fd596b91a9e0d536eff2ce525ba1fc01f4819a5c3f649bb59f4c77754edbbd5b0c
6
+ metadata.gz: 592e80cd56f4740cc4003b494c9da960de228025377d630d070a08a495cffac7fd41850d469189510def48f9940700251bd84a141def9b1b15b32fbed967261f
7
+ data.tar.gz: 1d802e95fd58c66c4904843478c5ccc8ffc1ce43e87dd199e5b21efc89bcff436b227c646d2e5a386bee22a3d1c67c0341c25fcb4058e48526560d05984f0148
data/CHANGELOG.org CHANGED
@@ -2,6 +2,41 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.4.1
6
+
7
+ - [User] New textual report for Apache
8
+ - [User] New option -w sets maximum width of URL, Path, and
9
+ Description columns in textual reports
10
+ - [User] Removed option -i, since input filenames are now taken
11
+ as direct arguments
12
+ - [User] Allow multiple files in input
13
+ - [Fixed] Complain if input format is not supported
14
+ - [Code] Refactoring of reports to manage better output to
15
+ multiple formats
16
+
17
+ * 1.4.0
18
+
19
+ - [User] The Apache Log report now organizes page requests in four
20
+ tables:
21
+ - success on HTML pages
22
+ - success on other resources
23
+ - failures on HTML pages
24
+ - failures on other resources
25
+ - [User] Increased the default limit of pages in reports to 900
26
+ - [User] The return status in now included in the page and resources
27
+ reports
28
+ - [User] The "Attack" table has been removed, since the data can be
29
+ gotten from the previous tables
30
+ - [Fixed] HTML pages are those with extension ".html" and ".htm"
31
+ - [Fixed] Wrong data on summary table of the apache report has
32
+ been fixed
33
+ - [Fixed] Better JavaScript escaping to avoid log poisoning
34
+ - [Fixed] Strengthened the Apache log parser
35
+
36
+ * 1.3.3 and 1.3.4
37
+
38
+ - [Gem] Moved repository to Github and fixes to gemspec
39
+
5
40
  * 1.3.2
6
41
 
7
42
  - [Code] HTML reports now generate JSON data which is shared between
data/README.org CHANGED
@@ -62,21 +62,21 @@ generated files are then made available on a private area on the web.
62
62
 
63
63
  #+RESULTS:
64
64
  #+begin_example
65
- Usage: log_sense [options] [logfile]
65
+ Usage: log_sense [options] [logfile ...]
66
66
  --title=TITLE Title to use in the report
67
67
  -f, --input-format=FORMAT Input format (either rails or apache)
68
- -i, --input-file=INPUT_FILE Input file
69
68
  -t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
70
69
  -o, --output-file=OUTPUT_FILE Output file
71
70
  -b, --begin=DATE Consider entries after or on DATE
72
71
  -e, --end=DATE Consider entries before or on DATE
73
- -l, --limit=N Number of entries to show (defaults to 30)
72
+ -l, --limit=N Limit to the N most requested resources (defaults to 900)
73
+ -w, --width=WIDTH Maximum width of URL and description columns in text reports
74
74
  -c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
75
75
  -n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
76
76
  -v, --version Prints version information
77
77
  -h, --help Prints this help
78
78
 
79
- This is version 1.3.1
79
+ This is version 1.4.1
80
80
 
81
81
  Output formats
82
82
  rails parsing can produce the following outputs:
@@ -85,6 +85,7 @@ generated files are then made available on a private area on the web.
85
85
  - html
86
86
  apache parsing can produce the following outputs:
87
87
  - sqlite
88
+ - txt
88
89
  - html
89
90
  #+end_example
90
91
 
data/exe/log_sense CHANGED
@@ -7,21 +7,15 @@ require 'log_sense.rb'
7
7
  #
8
8
 
9
9
  # this better be here... OptionsParser consumes ARGV
10
- @command_line = ARGV.join(" ")
11
-
10
+ @command_line = ARGV.join(' ')
12
11
  @options = LogSense::OptionsParser.parse ARGV
13
- @input_file = @options[:input_file] || ARGV[0]
14
12
  @output_file = @options[:output_file]
15
13
 
16
- if not @input_file
17
- puts "Error: no input file specified."
18
- exit
19
- end
20
-
21
- if not File.exist? @input_file
22
- puts "Error: input file '#{@input_file}' does not exist"
14
+ if ARGV.map { |x| File.exist?(x) }.include?(false)
15
+ warn.puts "Error: input file(s) '#{ARGV.reject { |x| File.exist(x) }.join(', ')}' do not exist"
23
16
  exit 1
24
17
  end
18
+ @input_files = ARGV.empty? ? [$stdin] : ARGV.map { |x| File.open(x, 'r') }
25
19
 
26
20
  #
27
21
  # Parse Log and Track Statistics
@@ -36,12 +30,15 @@ when 'apache'
36
30
  when 'rails'
37
31
  parser_klass = LogSense::RailsLogParser
38
32
  cruncher_klass = LogSense::RailsDataCruncher
33
+ else
34
+ warn.puts "Error: input format #{@options[:input_format]} not understood."
35
+ exit 1
39
36
  end
40
37
 
41
- @db = parser_klass.parse @input_file
38
+ @db = parser_klass.parse @input_files
42
39
 
43
- if @options[:output_format] == "sqlite"
44
- ddb = SQLite3::Database.new(@output_file || "db.sqlite3")
40
+ if @options[:output_format] == 'sqlite'
41
+ ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
45
42
  b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
46
43
  b.step(-1) #=> DONE
47
44
  b.finish
@@ -54,10 +51,11 @@ else
54
51
 
55
52
  @data = @data.merge({
56
53
  command: @command_line,
57
- log_file: @input_file,
54
+ log_files: @input_files,
58
55
  started_at: @started_at,
59
56
  ended_at: @ended_at,
60
- duration: @duration
57
+ duration: @duration,
58
+ width: @options[:width]
61
59
  })
62
60
 
63
61
  #
@@ -6,7 +6,7 @@ module LogSense
6
6
  # @ variables are automatically put in the returned data
7
7
  #
8
8
 
9
- def self.crunch db, options = { limit: 30 }
9
+ def self.crunch db, options = { limit: 900 }
10
10
  first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
11
11
  last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
12
12
 
@@ -15,9 +15,9 @@ module LogSense
15
15
  @last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
16
16
 
17
17
  @total_days = 0
18
- if @first_day and @last_day
19
- @total_days = (@last_day - @first_day).to_i
20
- end
18
+ @total_days = (@last_day - @first_day).to_i if @first_day && @last_day
19
+
20
+ @source_files = db.execute "SELECT distinct(source_file) from LogLine"
21
21
 
22
22
  @log_size = db.execute "SELECT count(datetime) from LogLine"
23
23
  @log_size = @log_size[0][0]
@@ -89,16 +89,18 @@ module LogSense
89
89
 
90
90
  @daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
91
91
  @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
92
- @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
93
- @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
94
- @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
95
- @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
96
92
 
97
- @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
98
- "extension != '#{x}'"
99
- }.join " and "
93
+ good_statuses = "(status like '2%' or status like '3%')"
94
+ bad_statuses = "(status like '4%' or status like '5%')"
95
+ html_page = "(extension like '.htm%')"
96
+ non_html_page = "(extension not like '.htm%')"
97
+
98
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
99
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
+
101
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
102
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
103
 
101
- @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
102
104
  @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
103
105
 
104
106
  @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
@@ -31,22 +31,21 @@ module LogSense
31
31
 
32
32
  TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
33
33
 
34
- HTTP_METHODS=/GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
- WEBDAV_METHODS=/COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
- OTHER_METHODS=/SEARCH|REPORT/
37
- METHOD=/(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
- PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9])/
39
- URL=/(?<url>[^ ]+)/
40
- REFERER=/(?<referer>[^ ]+)/
41
- RETURN_CODE=/(?<status>[1-5][0-9][0-9])/
42
- SIZE=/(?<size>[0-9]+|-)/
43
-
44
- USER_AGENT = /(?<user_agent>[^"]+)/
34
+ HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
+ WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
+ OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
+ METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
+ PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
+ URL = /(?<url>[^ ]+)/
40
+ REFERER = /(?<referer>[^"]*)/
41
+ RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
42
+ SIZE = /(?<size>[0-9]+|-)/
43
+ USER_AGENT = /(?<user_agent>[^"]*)/
45
44
 
46
45
  attr_reader :format
47
46
 
48
47
  def initialize
49
- @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
48
+ @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
50
49
  end
51
50
 
52
51
  def parse line
@@ -7,10 +7,9 @@ module LogSense
7
7
  # parse an Apache log file and return a SQLite3 DB
8
8
  #
9
9
 
10
- def self.parse filename, options = {}
11
- content = filename ? File.readlines(filename) : ARGF.readlines
10
+ def self.parse(streams, options = {})
11
+ db = SQLite3::Database.new ':memory:'
12
12
 
13
- db = SQLite3::Database.new ":memory:"
14
13
  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
15
14
  id INTEGER PRIMARY KEY AUTOINCREMENT,
16
15
  datetime TEXT,
@@ -28,15 +27,18 @@ module LogSense
28
27
  browser TEXT,
29
28
  browser_version TEXT,
30
29
  platform TEXT,
31
- platform_version TEXT)"
30
+ platform_version TEXT,
31
+ source_file TEXT,
32
+ line_number INTEGER
33
+ )"
32
34
 
33
- ins = db.prepare('insert into LogLine (
34
- datetime,
35
+ ins = db.prepare("insert into LogLine (
36
+ datetime,
35
37
  ip,
36
38
  user,
37
39
  unique_visitor,
38
40
  method,
39
- path,
41
+ path,
40
42
  extension,
41
43
  status,
42
44
  size,
@@ -46,44 +48,50 @@ module LogSense
46
48
  browser,
47
49
  browser_version,
48
50
  platform,
49
- platform_version)
50
- values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
51
+ platform_version,
52
+ source_file,
53
+ line_number
54
+ )
55
+ values (#{Array.new(18, '?').join(', ')})")
51
56
 
52
57
  parser = ApacheLogLineParser.new
53
-
54
- content.each do |line|
55
- begin
56
- hash = parser.parse line
57
- ua = Browser.new(hash[:user_agent], accept_language: "en-us")
58
- ins.execute(
59
- DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
60
- hash[:ip],
61
- hash[:userid],
62
- unique_visitor_id(hash),
63
- hash[:method],
64
- hash[:url],
65
- (hash[:url] ? File.extname(hash[:url]) : ""),
66
- hash[:status],
67
- hash[:size].to_i,
68
- hash[:referer],
69
- hash[:user_agent],
70
- ua.bot? ? 1 : 0,
71
- (ua.name || ""),
72
- (ua.version || ""),
73
- (ua.platform.name || ""),
74
- (ua.platform.version || "")
75
- )
76
- rescue StandardError => e
77
- STDERR.puts e.message
58
+
59
+ streams.each do |stream|
60
+ stream.readlines.each_with_index do |line, line_number|
61
+ begin
62
+ hash = parser.parse line
63
+ ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
64
+ ins.execute(
65
+ DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
66
+ hash[:ip],
67
+ hash[:userid],
68
+ unique_visitor_id(hash),
69
+ hash[:method],
70
+ hash[:url],
71
+ (hash[:url] ? File.extname(hash[:url]) : ''),
72
+ hash[:status],
73
+ hash[:size].to_i,
74
+ hash[:referer],
75
+ hash[:user_agent],
76
+ ua.bot? ? 1 : 0,
77
+ (ua.name || ''),
78
+ (ua.version || ''),
79
+ (ua.platform.name || ''),
80
+ (ua.platform.version || ''),
81
+ stream == $stdin ? "stdin" : stream.path,
82
+ line_number
83
+ )
84
+ rescue StandardError => e
85
+ warn.puts e.message
86
+ end
78
87
  end
79
88
  end
80
-
89
+
81
90
  db
82
91
  end
83
92
 
84
93
  def self.unique_visitor_id hash
85
94
  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
86
95
  end
87
-
88
96
  end
89
97
  end