log_sense 1.3.4 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8250028b7a7038e07f2bd9fa069bfdf0ab1815a8711a0b6971a91030d529882
4
- data.tar.gz: 236e1ba2c3ada9272f1e7eaa649cd1664776da933a90e41b6e70fffbbe002043
3
+ metadata.gz: 6ef1d35932ba8c7fe6e636f6cb507ea99fd6b6026170d62207dfa2606d62bbcb
4
+ data.tar.gz: 725dc6e51ace6c9cbd366e3888387ef8f246327c9e1036c66d2eb6351b1f0791
5
5
  SHA512:
6
- metadata.gz: 72b6284cd6f09ebf16c4fe34fc7098da44e993a2946fb09f72ccaa7898eb462b9fcb49ce2e37e06b206a3f00650f8a033ec034ffeeab3afe587b6b607d079302
7
- data.tar.gz: 6b96590430caa4e9717180c1812b0227b661f1fef4c99e10515da6da8acbe8fd596b91a9e0d536eff2ce525ba1fc01f4819a5c3f649bb59f4c77754edbbd5b0c
6
+ metadata.gz: 592e80cd56f4740cc4003b494c9da960de228025377d630d070a08a495cffac7fd41850d469189510def48f9940700251bd84a141def9b1b15b32fbed967261f
7
+ data.tar.gz: 1d802e95fd58c66c4904843478c5ccc8ffc1ce43e87dd199e5b21efc89bcff436b227c646d2e5a386bee22a3d1c67c0341c25fcb4058e48526560d05984f0148
data/CHANGELOG.org CHANGED
@@ -2,6 +2,41 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.4.1
6
+
7
+ - [User] New textual report for Apache
8
+ - [User] New option -w sets maximum width of URL, Path, and
9
+ Description columns in textual reports
10
+ - [User] Removed option -i, since input filenames are now taken
11
+ as direct arguments
12
+ - [User] Allow multiple files in input
13
+ - [Fixed] Complain if input format is not supported
14
+ - [Code] Refactoring of reports to manage better output to
15
+ multiple formats
16
+
17
+ * 1.4.0
18
+
19
+ - [User] The Apache Log report now organizes page requests in four
20
+ tables:
21
+ - success on HTML pages
22
+ - success on other resources
23
+ - failures on HTML pages
24
+ - failures on other resources
25
+ - [User] Increased the default limit of pages in reports to 900
26
+ - [User] The return status in now included in the page and resources
27
+ reports
28
+ - [User] The "Attack" table has been removed, since the data can be
29
+ gotten from the previous tables
30
+ - [Fixed] HTML pages are those with extension ".html" and ".htm"
31
+ - [Fixed] Wrong data on summary table of the apache report has
32
+ been fixed
33
+ - [Fixed] Better JavaScript escaping to avoid log poisoning
34
+ - [Fixed] Strengthened the Apache log parser
35
+
36
+ * 1.3.3 and 1.3.4
37
+
38
+ - [Gem] Moved repository to Github and fixes to gemspec
39
+
5
40
  * 1.3.2
6
41
 
7
42
  - [Code] HTML reports now generate JSON data which is shared between
data/README.org CHANGED
@@ -62,21 +62,21 @@ generated files are then made available on a private area on the web.
62
62
 
63
63
  #+RESULTS:
64
64
  #+begin_example
65
- Usage: log_sense [options] [logfile]
65
+ Usage: log_sense [options] [logfile ...]
66
66
  --title=TITLE Title to use in the report
67
67
  -f, --input-format=FORMAT Input format (either rails or apache)
68
- -i, --input-file=INPUT_FILE Input file
69
68
  -t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
70
69
  -o, --output-file=OUTPUT_FILE Output file
71
70
  -b, --begin=DATE Consider entries after or on DATE
72
71
  -e, --end=DATE Consider entries before or on DATE
73
- -l, --limit=N Number of entries to show (defaults to 30)
72
+ -l, --limit=N Limit to the N most requested resources (defaults to 900)
73
+ -w, --width=WIDTH Maximum width of URL and description columns in text reports
74
74
  -c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
75
75
  -n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
76
76
  -v, --version Prints version information
77
77
  -h, --help Prints this help
78
78
 
79
- This is version 1.3.1
79
+ This is version 1.4.1
80
80
 
81
81
  Output formats
82
82
  rails parsing can produce the following outputs:
@@ -85,6 +85,7 @@ generated files are then made available on a private area on the web.
85
85
  - html
86
86
  apache parsing can produce the following outputs:
87
87
  - sqlite
88
+ - txt
88
89
  - html
89
90
  #+end_example
90
91
 
data/exe/log_sense CHANGED
@@ -7,21 +7,15 @@ require 'log_sense.rb'
7
7
  #
8
8
 
9
9
  # this better be here... OptionsParser consumes ARGV
10
- @command_line = ARGV.join(" ")
11
-
10
+ @command_line = ARGV.join(' ')
12
11
  @options = LogSense::OptionsParser.parse ARGV
13
- @input_file = @options[:input_file] || ARGV[0]
14
12
  @output_file = @options[:output_file]
15
13
 
16
- if not @input_file
17
- puts "Error: no input file specified."
18
- exit
19
- end
20
-
21
- if not File.exist? @input_file
22
- puts "Error: input file '#{@input_file}' does not exist"
14
+ if ARGV.map { |x| File.exist?(x) }.include?(false)
15
+ warn.puts "Error: input file(s) '#{ARGV.reject { |x| File.exist(x) }.join(', ')}' do not exist"
23
16
  exit 1
24
17
  end
18
+ @input_files = ARGV.empty? ? [$stdin] : ARGV.map { |x| File.open(x, 'r') }
25
19
 
26
20
  #
27
21
  # Parse Log and Track Statistics
@@ -36,12 +30,15 @@ when 'apache'
36
30
  when 'rails'
37
31
  parser_klass = LogSense::RailsLogParser
38
32
  cruncher_klass = LogSense::RailsDataCruncher
33
+ else
34
+ warn.puts "Error: input format #{@options[:input_format]} not understood."
35
+ exit 1
39
36
  end
40
37
 
41
- @db = parser_klass.parse @input_file
38
+ @db = parser_klass.parse @input_files
42
39
 
43
- if @options[:output_format] == "sqlite"
44
- ddb = SQLite3::Database.new(@output_file || "db.sqlite3")
40
+ if @options[:output_format] == 'sqlite'
41
+ ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
45
42
  b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
46
43
  b.step(-1) #=> DONE
47
44
  b.finish
@@ -54,10 +51,11 @@ else
54
51
 
55
52
  @data = @data.merge({
56
53
  command: @command_line,
57
- log_file: @input_file,
54
+ log_files: @input_files,
58
55
  started_at: @started_at,
59
56
  ended_at: @ended_at,
60
- duration: @duration
57
+ duration: @duration,
58
+ width: @options[:width]
61
59
  })
62
60
 
63
61
  #
@@ -6,7 +6,7 @@ module LogSense
6
6
  # @ variables are automatically put in the returned data
7
7
  #
8
8
 
9
- def self.crunch db, options = { limit: 30 }
9
+ def self.crunch db, options = { limit: 900 }
10
10
  first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
11
11
  last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
12
12
 
@@ -15,9 +15,9 @@ module LogSense
15
15
  @last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
16
16
 
17
17
  @total_days = 0
18
- if @first_day and @last_day
19
- @total_days = (@last_day - @first_day).to_i
20
- end
18
+ @total_days = (@last_day - @first_day).to_i if @first_day && @last_day
19
+
20
+ @source_files = db.execute "SELECT distinct(source_file) from LogLine"
21
21
 
22
22
  @log_size = db.execute "SELECT count(datetime) from LogLine"
23
23
  @log_size = @log_size[0][0]
@@ -89,16 +89,18 @@ module LogSense
89
89
 
90
90
  @daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
91
91
  @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
92
- @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
93
- @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
94
- @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
95
- @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
96
92
 
97
- @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
98
- "extension != '#{x}'"
99
- }.join " and "
93
+ good_statuses = "(status like '2%' or status like '3%')"
94
+ bad_statuses = "(status like '4%' or status like '5%')"
95
+ html_page = "(extension like '.htm%')"
96
+ non_html_page = "(extension not like '.htm%')"
97
+
98
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
99
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
+
101
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
102
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
103
 
101
- @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
102
104
  @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
103
105
 
104
106
  @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
@@ -31,22 +31,21 @@ module LogSense
31
31
 
32
32
  TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
33
33
 
34
- HTTP_METHODS=/GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
- WEBDAV_METHODS=/COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
- OTHER_METHODS=/SEARCH|REPORT/
37
- METHOD=/(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
- PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9])/
39
- URL=/(?<url>[^ ]+)/
40
- REFERER=/(?<referer>[^ ]+)/
41
- RETURN_CODE=/(?<status>[1-5][0-9][0-9])/
42
- SIZE=/(?<size>[0-9]+|-)/
43
-
44
- USER_AGENT = /(?<user_agent>[^"]+)/
34
+ HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
+ WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
+ OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
+ METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
+ PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
+ URL = /(?<url>[^ ]+)/
40
+ REFERER = /(?<referer>[^"]*)/
41
+ RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
42
+ SIZE = /(?<size>[0-9]+|-)/
43
+ USER_AGENT = /(?<user_agent>[^"]*)/
45
44
 
46
45
  attr_reader :format
47
46
 
48
47
  def initialize
49
- @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
48
+ @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
50
49
  end
51
50
 
52
51
  def parse line
@@ -7,10 +7,9 @@ module LogSense
7
7
  # parse an Apache log file and return a SQLite3 DB
8
8
  #
9
9
 
10
- def self.parse filename, options = {}
11
- content = filename ? File.readlines(filename) : ARGF.readlines
10
+ def self.parse(streams, options = {})
11
+ db = SQLite3::Database.new ':memory:'
12
12
 
13
- db = SQLite3::Database.new ":memory:"
14
13
  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
15
14
  id INTEGER PRIMARY KEY AUTOINCREMENT,
16
15
  datetime TEXT,
@@ -28,15 +27,18 @@ module LogSense
28
27
  browser TEXT,
29
28
  browser_version TEXT,
30
29
  platform TEXT,
31
- platform_version TEXT)"
30
+ platform_version TEXT,
31
+ source_file TEXT,
32
+ line_number INTEGER
33
+ )"
32
34
 
33
- ins = db.prepare('insert into LogLine (
34
- datetime,
35
+ ins = db.prepare("insert into LogLine (
36
+ datetime,
35
37
  ip,
36
38
  user,
37
39
  unique_visitor,
38
40
  method,
39
- path,
41
+ path,
40
42
  extension,
41
43
  status,
42
44
  size,
@@ -46,44 +48,50 @@ module LogSense
46
48
  browser,
47
49
  browser_version,
48
50
  platform,
49
- platform_version)
50
- values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
51
+ platform_version,
52
+ source_file,
53
+ line_number
54
+ )
55
+ values (#{Array.new(18, '?').join(', ')})")
51
56
 
52
57
  parser = ApacheLogLineParser.new
53
-
54
- content.each do |line|
55
- begin
56
- hash = parser.parse line
57
- ua = Browser.new(hash[:user_agent], accept_language: "en-us")
58
- ins.execute(
59
- DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
60
- hash[:ip],
61
- hash[:userid],
62
- unique_visitor_id(hash),
63
- hash[:method],
64
- hash[:url],
65
- (hash[:url] ? File.extname(hash[:url]) : ""),
66
- hash[:status],
67
- hash[:size].to_i,
68
- hash[:referer],
69
- hash[:user_agent],
70
- ua.bot? ? 1 : 0,
71
- (ua.name || ""),
72
- (ua.version || ""),
73
- (ua.platform.name || ""),
74
- (ua.platform.version || "")
75
- )
76
- rescue StandardError => e
77
- STDERR.puts e.message
58
+
59
+ streams.each do |stream|
60
+ stream.readlines.each_with_index do |line, line_number|
61
+ begin
62
+ hash = parser.parse line
63
+ ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
64
+ ins.execute(
65
+ DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
66
+ hash[:ip],
67
+ hash[:userid],
68
+ unique_visitor_id(hash),
69
+ hash[:method],
70
+ hash[:url],
71
+ (hash[:url] ? File.extname(hash[:url]) : ''),
72
+ hash[:status],
73
+ hash[:size].to_i,
74
+ hash[:referer],
75
+ hash[:user_agent],
76
+ ua.bot? ? 1 : 0,
77
+ (ua.name || ''),
78
+ (ua.version || ''),
79
+ (ua.platform.name || ''),
80
+ (ua.platform.version || ''),
81
+ stream == $stdin ? "stdin" : stream.path,
82
+ line_number
83
+ )
84
+ rescue StandardError => e
85
+ warn.puts e.message
86
+ end
78
87
  end
79
88
  end
80
-
89
+
81
90
  db
82
91
  end
83
92
 
84
93
  def self.unique_visitor_id hash
85
94
  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
86
95
  end
87
-
88
96
  end
89
97
  end