log_sense 1.4.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.org +34 -0
  3. data/Gemfile.lock +4 -4
  4. data/README.org +25 -10
  5. data/Rakefile +17 -3
  6. data/exe/log_sense +30 -15
  7. data/ip_locations/dbip-country-lite.sqlite3 +0 -0
  8. data/lib/log_sense/apache_data_cruncher.rb +19 -21
  9. data/lib/log_sense/apache_log_line_parser.rb +10 -10
  10. data/lib/log_sense/apache_log_parser.rb +44 -36
  11. data/lib/log_sense/emitter.rb +518 -25
  12. data/lib/log_sense/ip_locator.rb +26 -19
  13. data/lib/log_sense/options_parser.rb +37 -27
  14. data/lib/log_sense/rails_data_cruncher.rb +7 -3
  15. data/lib/log_sense/rails_log_parser.rb +108 -100
  16. data/lib/log_sense/templates/_command_invocation.html.erb +2 -2
  17. data/lib/log_sense/templates/_command_invocation.txt.erb +5 -3
  18. data/lib/log_sense/templates/_navigation.html.erb +22 -0
  19. data/lib/log_sense/templates/_output_table.html.erb +1 -7
  20. data/lib/log_sense/templates/_output_table.txt.erb +14 -0
  21. data/lib/log_sense/templates/_performance.html.erb +1 -1
  22. data/lib/log_sense/templates/_performance.txt.erb +8 -5
  23. data/lib/log_sense/templates/_report_data.html.erb +2 -3
  24. data/lib/log_sense/templates/_stylesheet.css +144 -0
  25. data/lib/log_sense/templates/_summary.html.erb +2 -2
  26. data/lib/log_sense/templates/_summary.txt.erb +11 -8
  27. data/lib/log_sense/templates/_warning.txt.erb +1 -0
  28. data/lib/log_sense/templates/apache.html.erb +51 -527
  29. data/lib/log_sense/templates/apache.txt.erb +22 -0
  30. data/lib/log_sense/templates/rails.html.erb +56 -353
  31. data/lib/log_sense/templates/rails.txt.erb +10 -60
  32. data/lib/log_sense/version.rb +1 -1
  33. metadata +7 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 266e20972553f6d409814398dab832334a4c01bfa12e90c74acfdc75ee0b7c8d
4
- data.tar.gz: 228e6bdc2d931e5190d82fc5ba66660ff6f0de0277a876746154de81a1ffe4e2
3
+ metadata.gz: 1f20a0d2041df1f2414bd0015fff27764c28a89dfd2d45fdb275141638bc5784
4
+ data.tar.gz: 14426b7383fe9b0077c2852f177545384a369f35489aa5d95372c20cbe19732d
5
5
  SHA512:
6
- metadata.gz: f1454d78cfec258ff3bc69359be29178ebab4cf7ffd2869d736c29f2cffd6efe209f65be59298864bf94de30bd022a3397c91446b043c49e704b6d38ced59357
7
- data.tar.gz: aa7239af4bb17270a23d9931194859b01a8ff50ebb9cc3c3ed37aeac1702aef413051b0381c57208dcffedd31ccbff5bd0a9485c0fe18947f540cda9f4463acd
6
+ metadata.gz: 9defcff35a5b3802d7d1a7db596a30aa01e28d3b9d784619de5f61713a587e427ccce76fcfcd478e946a0822af8d0822b23bad6392c2a71690ccd07250d5cfb7
7
+ data.tar.gz: 8b9143feb4f7508de9b7f60eddacafc4713c064f8bda21ed9ae92de364aa5e94fed36ff6d12f2b2aa108337d92c25868bce21d172397f48ff3c41ed93dc9a2b5
data/CHANGELOG.org CHANGED
@@ -2,6 +2,40 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.5.1
6
+
7
+ - [User] Option --input-files allows to specify input files
8
+ in addition to passing filenames to the command line
9
+ - [User] Minor changes to the layout of HTML reports
10
+ - [User] Add version number in reports
11
+ - [Fixed] Duplicated entries in navigation
12
+ - [Code] Updated and added minitest(s)
13
+
14
+ * 1.5.0
15
+
16
+ - [User] Present Unique Visits / day as integer
17
+ - [User] Added Country and Streaks report for rails
18
+ - [User] Changed Streak report in Apache
19
+ - [Gem] Updated DBIP
20
+ - [Gem] Updated Bundle
21
+ - [Code] Refactored all reports, so that they are specified
22
+ in the same way
23
+ - [Code] Refactor warning message in textual reports
24
+ - [Code] Build HTML menu for report specification
25
+ - [Code] Various refactoring passes on the code
26
+
27
+ * 1.4.1
28
+
29
+ - [User] New textual report for Apache
30
+ - [User] New option -w sets maximum width of URL, Path, and
31
+ Description columns in textual reports
32
+ - [User] Removed option -i, since input filenames are now taken
33
+ as direct arguments
34
+ - [User] Allow multiple files in input
35
+ - [Fixed] Complain if input format is not supported
36
+ - [Code] Refactoring of reports to manage better output to
37
+ multiple formats
38
+
5
39
  * 1.4.0
6
40
 
7
41
  - [User] The Apache Log report now organizes page requests in four
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- log_sense (1.3.1)
4
+ log_sense (1.4.2)
5
5
  browser
6
6
  ipaddr
7
7
  iso_country_codes
@@ -13,9 +13,9 @@ GEM
13
13
  specs:
14
14
  browser (5.3.1)
15
15
  byebug (11.1.3)
16
- ipaddr (1.2.3)
16
+ ipaddr (1.2.4)
17
17
  iso_country_codes (0.7.8)
18
- minitest (5.14.4)
18
+ minitest (5.15.0)
19
19
  rake (12.3.3)
20
20
  sqlite3 (1.4.2)
21
21
  terminal-table (3.0.2)
@@ -32,4 +32,4 @@ DEPENDENCIES
32
32
  rake (~> 12.0)
33
33
 
34
34
  BUNDLED WITH
35
- 2.2.32
35
+ 2.3.3
data/README.org CHANGED
@@ -19,8 +19,6 @@ LogSense reports the following data:
19
19
  - OS, browsers, and devices
20
20
  - IP Country location, thanks to the DPIP lite country DB
21
21
  - Streaks: resources accessed by a given IP over time
22
- - Potential attacks: access to resources which are not meant to be
23
- served by a web server serving static websites
24
22
  - Performance of Rails requests
25
23
 
26
24
  Filters from the command line allow to analyze specific periods and
@@ -33,6 +31,18 @@ And, of course, the compulsory screenshot:
33
31
  #+ATTR_HTML: :width 80%
34
32
  [[file:./apache-screenshot.png]]
35
33
 
34
+
35
+ * An important word of warning
36
+
37
+ [[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
38
+ user input to forge log entries or inject malicious content into the logs.
39
+
40
+ log_sense sanitizes entries of HTML reports, to try and protect from log
41
+ poisoning. *Log entries and URLs in SQLite3, however, are not sanitized*:
42
+ they are stored and read from the log. This is not, in general, an issue,
43
+ unless you use the data from SQLite in environments in which URLs can be
44
+ opened or code executed.
45
+
36
46
  * Motivation
37
47
 
38
48
  LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
@@ -54,6 +64,7 @@ generated files are then made available on a private area on the web.
54
64
  gem install log_sense
55
65
  #+end_src
56
66
 
67
+
57
68
  * Usage
58
69
 
59
70
  #+begin_src bash :results raw output :wrap example
@@ -62,21 +73,23 @@ generated files are then made available on a private area on the web.
62
73
 
63
74
  #+RESULTS:
64
75
  #+begin_example
65
- Usage: log_sense [options] [logfile]
76
+ Usage: log_sense [options] [logfile ...]
66
77
  --title=TITLE Title to use in the report
67
78
  -f, --input-format=FORMAT Input format (either rails or apache)
68
- -i, --input-file=INPUT_FILE Input file
79
+ -i, --input-files=file,file, Input files (can also be passed directly)
69
80
  -t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
70
81
  -o, --output-file=OUTPUT_FILE Output file
71
82
  -b, --begin=DATE Consider entries after or on DATE
72
83
  -e, --end=DATE Consider entries before or on DATE
73
- -l, --limit=N Number of entries to show (defaults to 30)
84
+ -l, --limit=N Limit to the N most requested resources (defaults to 900)
85
+ -w, --width=WIDTH Maximum width of URL and description columns in text reports
74
86
  -c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
75
87
  -n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
88
+ --verbose Inform about progress (prints to STDERR)
76
89
  -v, --version Prints version information
77
90
  -h, --help Prints this help
78
91
 
79
- This is version 1.3.1
92
+ This is version 1.5.1
80
93
 
81
94
  Output formats
82
95
  rails parsing can produce the following outputs:
@@ -85,6 +98,7 @@ generated files are then made available on a private area on the web.
85
98
  - html
86
99
  apache parsing can produce the following outputs:
87
100
  - sqlite
101
+ - txt
88
102
  - html
89
103
  #+end_example
90
104
 
@@ -95,6 +109,7 @@ log_sense -f apache -i access.log -t txt > access-data.txt
95
109
  log_sense -f rails -i production.log -t html -o performance.txt
96
110
  #+end_example
97
111
 
112
+
98
113
  * Change Log
99
114
 
100
115
  See the [[file:CHANGELOG.org][CHANGELOG]] file.
@@ -109,8 +124,8 @@ Concerning the outputs:
109
124
  - HTML reports use [[https://get.foundation/][Zurb Foundation]], [[https://www.datatables.net/][Data Tables]], and [[https://vega.github.io/vega-lite/][Vega Light]], which
110
125
  are all downloaded from a CDN
111
126
  - The textual format is compatible with [[https://orgmode.org/][Org Mode]] and can be further
112
- processed to any format [[https://orgmode.org/][Org Mode]] can be exported to (including HTML
113
- and PDF)
127
+ processed to any format [[https://orgmode.org/][Org Mode]] can be exported to, including HTML
128
+ and PDF, with the word of warning in the section above.
114
129
 
115
130
  * Author and Contributors
116
131
 
@@ -118,8 +133,8 @@ Concerning the outputs:
118
133
 
119
134
  * Known Bugs
120
135
 
121
- No known bugs; an unknown number of unknown bugs.
122
- (See the open issues for the known bugs.)
136
+ No known bugs; an unknown number of unknown bugs. (See the open issues for
137
+ the known bugs.)
123
138
 
124
139
  * License
125
140
 
data/Rakefile CHANGED
@@ -9,7 +9,21 @@ end
9
9
  require_relative './lib/log_sense/ip_locator.rb'
10
10
 
11
11
  desc "Convert Geolocation DB to sqlite"
12
- task :dbip_to_sqlite3, [:filename] do |tasks, args|
13
- filename = args[:filename]
14
- ApacheLogReport::IpLocator::dbip_to_sqlite filename
12
+ task :dbip_to_sqlite3, [:year_month] do |tasks, args|
13
+ filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
14
+
15
+ if !File.exist? filename
16
+ puts "Error. Could not find: #{filename}"
17
+ puts
18
+ puts 'I see the following files:'
19
+ puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
20
+ puts ''
21
+ puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
22
+ puts '2. Save downloaded file to ip_locations/'
23
+ puts '3. Relaunch with YYYY-MM'
24
+
25
+ exit
26
+ else
27
+ LogSense::IpLocator::dbip_to_sqlite filename
28
+ end
15
29
  end
data/exe/log_sense CHANGED
@@ -7,21 +7,22 @@ require 'log_sense.rb'
7
7
  #
8
8
 
9
9
  # this better be here... OptionsParser consumes ARGV
10
- @command_line = ARGV.join(" ")
11
-
10
+ @command_line = ARGV.join(' ')
12
11
  @options = LogSense::OptionsParser.parse ARGV
13
- @input_file = @options[:input_file] || ARGV[0]
14
12
  @output_file = @options[:output_file]
15
13
 
16
- if not @input_file
17
- puts "Error: no input file specified."
18
- exit
19
- end
14
+ #
15
+ # Input files can be gotten from an option and from what remains in
16
+ # ARGV
17
+ #
18
+ @input_filenames = @options[:input_filenames] + ARGV
19
+ @non_existing = @input_filenames.reject { |x| File.exist?(x) }
20
20
 
21
- if not File.exist? @input_file
22
- puts "Error: input file '#{@input_file}' does not exist"
21
+ unless @non_existing.empty?
22
+ $stderr.puts "Error: input file(s) '#{@non_existing.join(', ')}' do not exist"
23
23
  exit 1
24
24
  end
25
+ @input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
25
26
 
26
27
  #
27
28
  # Parse Log and Track Statistics
@@ -36,32 +37,46 @@ when 'apache'
36
37
  when 'rails'
37
38
  parser_klass = LogSense::RailsLogParser
38
39
  cruncher_klass = LogSense::RailsDataCruncher
40
+ else
41
+ $stderr.puts "Error: input format #{@options[:input_format]} not understood."
42
+ exit 1
39
43
  end
40
44
 
41
- @db = parser_klass.parse @input_file
45
+ $stderr.puts "Parsing input files..." if @options[:verbose]
46
+ @db = parser_klass.parse @input_files
42
47
 
43
- if @options[:output_format] == "sqlite"
44
- ddb = SQLite3::Database.new(@output_file || "db.sqlite3")
48
+ if @options[:output_format] == 'sqlite'
49
+ $stderr.puts "Saving to SQLite3..." if @options[:verbose]
50
+ ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
45
51
  b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
46
52
  b.step(-1) #=> DONE
47
53
  b.finish
48
54
  else
55
+ $stderr.puts "Aggregating data..." if @options[:verbose]
49
56
  @data = cruncher_klass.crunch @db, @options
57
+
58
+ $stderr.puts "Geolocating..." if @options[:verbose]
50
59
  @data = LogSense::IpLocator.geolocate @data
51
60
 
61
+ $stderr.puts "Grouping by country..." if @options[:verbose]
62
+ country_col = @data[:ips][0].size - 1
63
+ @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
64
+
52
65
  @ended_at = Time.now
53
66
  @duration = @ended_at - @started_at
54
67
 
55
68
  @data = @data.merge({
56
69
  command: @command_line,
57
- log_file: @input_file,
70
+ filenames: ARGV,
71
+ log_files: @input_files,
58
72
  started_at: @started_at,
59
73
  ended_at: @ended_at,
60
- duration: @duration
74
+ duration: @duration,
75
+ width: @options[:width]
61
76
  })
62
-
63
77
  #
64
78
  # Emit Output
65
79
  #
80
+ $stderr.puts "Emitting..." if @options[:verbose]
66
81
  puts LogSense::Emitter.emit @data, @options
67
82
  end
Binary file
@@ -15,17 +15,17 @@ module LogSense
15
15
  @last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
16
16
 
17
17
  @total_days = 0
18
- if @first_day and @last_day
19
- @total_days = (@last_day - @first_day).to_i
20
- end
18
+ @total_days = (@last_day - @first_day).to_i if @first_day && @last_day
19
+
20
+ @source_files = db.execute 'SELECT distinct(source_file) from LogLine'
21
21
 
22
- @log_size = db.execute "SELECT count(datetime) from LogLine"
22
+ @log_size = db.execute 'SELECT count(datetime) from LogLine'
23
23
  @log_size = @log_size[0][0]
24
24
 
25
25
  @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
26
26
  @selfpolls_size = @selfpolls_size[0][0]
27
27
 
28
- @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
28
+ @crawlers_size = db.execute 'SELECT count(datetime) from LogLine where bot == 1'
29
29
  @crawlers_size = @crawlers_size[0][0]
30
30
 
31
31
  @first_day_requested = options[:from_date]
@@ -35,7 +35,7 @@ module LogSense
35
35
  @last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
36
36
 
37
37
  @total_days_in_analysis = 0
38
- if @first_day_in_analysis and @last_day_in_analysis
38
+ if @first_day_in_analysis && @last_day_in_analysis
39
39
  @total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
40
40
  end
41
41
 
@@ -45,24 +45,24 @@ module LogSense
45
45
  filter = [
46
46
  (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
47
47
  (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
48
- (options[:only_crawlers] ? "bot == 1" : nil),
49
- (options[:ignore_crawlers] ? "bot == 0" : nil),
48
+ (options[:only_crawlers] ? 'bot == 1' : nil),
49
+ (options[:ignore_crawlers] ? 'bot == 0' : nil),
50
50
  (options[:no_selfpolls] ? "ip != '::1'" : nil),
51
- "true"
51
+ 'true'
52
52
  ].compact.join " and "
53
53
 
54
54
  mega = 1024 * 1024
55
55
  giga = mega * 1024
56
56
  tera = giga * 1024
57
-
57
+
58
58
  # in alternative to sum(size)
59
59
  human_readable_size = <<-EOS
60
- CASE
60
+ CASE
61
61
  WHEN sum(size) < 1024 THEN sum(size) || ' B'
62
62
  WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
63
63
  WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
64
64
  WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
65
- WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
65
+ WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
66
66
  END AS size
67
67
  EOS
68
68
 
@@ -117,20 +117,19 @@ module LogSense
117
117
 
118
118
  @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
119
119
 
120
- @streaks = db.execute "SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime"
120
+ @streaks = db.execute 'SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime'
121
121
  data = {}
122
122
 
123
- self.instance_variables.each do |variable|
124
- var_as_symbol = variable.to_s[1..-1].to_sym
125
- data[var_as_symbol] = eval(variable.to_s)
123
+ instance_variables.each do |variable|
124
+ var_as_symbol = variable.to_s[1..].to_sym
125
+ data[var_as_symbol] = instance_variable_get(variable)
126
126
  end
127
+
127
128
  data
128
129
  end
129
130
 
130
- private
131
-
132
- def self.date_intersect date1, date2, method
133
- if date1 and date2
131
+ def self.date_intersect(date1, date2, method)
132
+ if date1 && date2
134
133
  [date1, date2].send(method)
135
134
  elsif date1
136
135
  date1
@@ -140,4 +139,3 @@ module LogSense
140
139
  end
141
140
  end
142
141
  end
143
-
@@ -31,20 +31,20 @@ module LogSense
31
31
 
32
32
  TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
33
33
 
34
- HTTP_METHODS=/GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
- WEBDAV_METHODS=/COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
- OTHER_METHODS=/SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
- METHOD=/(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
- PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
- URL=/(?<url>[^ ]+)/
40
- REFERER=/(?<referer>[^"]*)/
41
- RETURN_CODE=/(?<status>[1-5][0-9][0-9])/
42
- SIZE=/(?<size>[0-9]+|-)/
34
+ HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
+ WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
+ OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
+ METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
+ PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
+ URL = /(?<url>[^ ]+)/
40
+ REFERER = /(?<referer>[^"]*)/
41
+ RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
42
+ SIZE = /(?<size>[0-9]+|-)/
43
43
  USER_AGENT = /(?<user_agent>[^"]*)/
44
44
 
45
45
  attr_reader :format
46
46
 
47
- def initialize
47
+ def initialize
48
48
  @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
49
49
  end
50
50
 
@@ -7,10 +7,9 @@ module LogSense
7
7
  # parse an Apache log file and return a SQLite3 DB
8
8
  #
9
9
 
10
- def self.parse filename, options = {}
11
- content = filename ? File.readlines(filename) : ARGF.readlines
10
+ def self.parse(streams, options = {})
11
+ db = SQLite3::Database.new ':memory:'
12
12
 
13
- db = SQLite3::Database.new ":memory:"
14
13
  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
15
14
  id INTEGER PRIMARY KEY AUTOINCREMENT,
16
15
  datetime TEXT,
@@ -28,15 +27,18 @@ module LogSense
28
27
  browser TEXT,
29
28
  browser_version TEXT,
30
29
  platform TEXT,
31
- platform_version TEXT)"
30
+ platform_version TEXT,
31
+ source_file TEXT,
32
+ line_number INTEGER
33
+ )"
32
34
 
33
- ins = db.prepare('insert into LogLine (
34
- datetime,
35
+ ins = db.prepare("insert into LogLine (
36
+ datetime,
35
37
  ip,
36
38
  user,
37
39
  unique_visitor,
38
40
  method,
39
- path,
41
+ path,
40
42
  extension,
41
43
  status,
42
44
  size,
@@ -46,44 +48,50 @@ module LogSense
46
48
  browser,
47
49
  browser_version,
48
50
  platform,
49
- platform_version)
50
- values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
51
+ platform_version,
52
+ source_file,
53
+ line_number
54
+ )
55
+ values (#{Array.new(18, '?').join(', ')})")
51
56
 
52
57
  parser = ApacheLogLineParser.new
53
-
54
- content.each do |line|
55
- begin
56
- hash = parser.parse line
57
- ua = Browser.new(hash[:user_agent], accept_language: "en-us")
58
- ins.execute(
59
- DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
60
- hash[:ip],
61
- hash[:userid],
62
- unique_visitor_id(hash),
63
- hash[:method],
64
- hash[:url],
65
- (hash[:url] ? File.extname(hash[:url]) : ""),
66
- hash[:status],
67
- hash[:size].to_i,
68
- hash[:referer],
69
- hash[:user_agent],
70
- ua.bot? ? 1 : 0,
71
- (ua.name || ""),
72
- (ua.version || ""),
73
- (ua.platform.name || ""),
74
- (ua.platform.version || "")
75
- )
76
- rescue StandardError => e
77
- STDERR.puts e.message
58
+
59
+ streams.each do |stream|
60
+ stream.readlines.each_with_index do |line, line_number|
61
+ begin
62
+ hash = parser.parse line
63
+ ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
64
+ ins.execute(
65
+ DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
66
+ hash[:ip],
67
+ hash[:userid],
68
+ unique_visitor_id(hash),
69
+ hash[:method],
70
+ hash[:url],
71
+ (hash[:url] ? File.extname(hash[:url]) : ''),
72
+ hash[:status],
73
+ hash[:size].to_i,
74
+ hash[:referer],
75
+ hash[:user_agent],
76
+ ua.bot? ? 1 : 0,
77
+ (ua.name || ''),
78
+ (ua.version || ''),
79
+ (ua.platform.name || ''),
80
+ (ua.platform.version || ''),
81
+ stream == $stdin ? "stdin" : stream.path,
82
+ line_number
83
+ )
84
+ rescue StandardError => e
85
+ $stderr.puts e.message
86
+ end
78
87
  end
79
88
  end
80
-
89
+
81
90
  db
82
91
  end
83
92
 
84
93
  def self.unique_visitor_id hash
85
94
  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
86
95
  end
87
-
88
96
  end
89
97
  end