log_sense 1.4.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.org +34 -0
  3. data/Gemfile.lock +4 -4
  4. data/README.org +25 -10
  5. data/Rakefile +17 -3
  6. data/exe/log_sense +30 -15
  7. data/ip_locations/dbip-country-lite.sqlite3 +0 -0
  8. data/lib/log_sense/apache_data_cruncher.rb +19 -21
  9. data/lib/log_sense/apache_log_line_parser.rb +10 -10
  10. data/lib/log_sense/apache_log_parser.rb +44 -36
  11. data/lib/log_sense/emitter.rb +518 -25
  12. data/lib/log_sense/ip_locator.rb +26 -19
  13. data/lib/log_sense/options_parser.rb +37 -27
  14. data/lib/log_sense/rails_data_cruncher.rb +7 -3
  15. data/lib/log_sense/rails_log_parser.rb +108 -100
  16. data/lib/log_sense/templates/_command_invocation.html.erb +2 -2
  17. data/lib/log_sense/templates/_command_invocation.txt.erb +5 -3
  18. data/lib/log_sense/templates/_navigation.html.erb +22 -0
  19. data/lib/log_sense/templates/_output_table.html.erb +1 -7
  20. data/lib/log_sense/templates/_output_table.txt.erb +14 -0
  21. data/lib/log_sense/templates/_performance.html.erb +1 -1
  22. data/lib/log_sense/templates/_performance.txt.erb +8 -5
  23. data/lib/log_sense/templates/_report_data.html.erb +2 -3
  24. data/lib/log_sense/templates/_stylesheet.css +144 -0
  25. data/lib/log_sense/templates/_summary.html.erb +2 -2
  26. data/lib/log_sense/templates/_summary.txt.erb +11 -8
  27. data/lib/log_sense/templates/_warning.txt.erb +1 -0
  28. data/lib/log_sense/templates/apache.html.erb +51 -527
  29. data/lib/log_sense/templates/apache.txt.erb +22 -0
  30. data/lib/log_sense/templates/rails.html.erb +56 -353
  31. data/lib/log_sense/templates/rails.txt.erb +10 -60
  32. data/lib/log_sense/version.rb +1 -1
  33. metadata +7 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 266e20972553f6d409814398dab832334a4c01bfa12e90c74acfdc75ee0b7c8d
4
- data.tar.gz: 228e6bdc2d931e5190d82fc5ba66660ff6f0de0277a876746154de81a1ffe4e2
3
+ metadata.gz: 1f20a0d2041df1f2414bd0015fff27764c28a89dfd2d45fdb275141638bc5784
4
+ data.tar.gz: 14426b7383fe9b0077c2852f177545384a369f35489aa5d95372c20cbe19732d
5
5
  SHA512:
6
- metadata.gz: f1454d78cfec258ff3bc69359be29178ebab4cf7ffd2869d736c29f2cffd6efe209f65be59298864bf94de30bd022a3397c91446b043c49e704b6d38ced59357
7
- data.tar.gz: aa7239af4bb17270a23d9931194859b01a8ff50ebb9cc3c3ed37aeac1702aef413051b0381c57208dcffedd31ccbff5bd0a9485c0fe18947f540cda9f4463acd
6
+ metadata.gz: 9defcff35a5b3802d7d1a7db596a30aa01e28d3b9d784619de5f61713a587e427ccce76fcfcd478e946a0822af8d0822b23bad6392c2a71690ccd07250d5cfb7
7
+ data.tar.gz: 8b9143feb4f7508de9b7f60eddacafc4713c064f8bda21ed9ae92de364aa5e94fed36ff6d12f2b2aa108337d92c25868bce21d172397f48ff3c41ed93dc9a2b5
data/CHANGELOG.org CHANGED
@@ -2,6 +2,40 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.5.1
6
+
7
+ - [User] Option --input-files allows to specify input files
8
+ in addition to passing filenames to the command line
9
+ - [User] Minor changes to the layout of HTML reports
10
+ - [User] Add version number in reports
11
+ - [Fixed] Duplicated entries in navigation
12
+ - [Code] Updated and added minitest(s)
13
+
14
+ * 1.5.0
15
+
16
+ - [User] Present Unique Visits / day as integer
17
+ - [User] Added Country and Streaks report for rails
18
+ - [User] Changed Streak report in Apache
19
+ - [Gem] Updated DBIP
20
+ - [Gem] Updated Bundle
21
+ - [Code] Refactored all reports, so that they are specified
22
+ in the same way
23
+ - [Code] Refactor warning message in textual reports
24
+ - [Code] Build HTML menu for report specification
25
+ - [Code] Various refactoring passes on the code
26
+
27
+ * 1.4.1
28
+
29
+ - [User] New textual report for Apache
30
+ - [User] New option -w sets maximum width of URL, Path, and
31
+ Description columns in textual reports
32
+ - [User] Removed option -i, since input filenames are now taken
33
+ as direct arguments
34
+ - [User] Allow multiple files in input
35
+ - [Fixed] Complain if input format is not supported
36
+ - [Code] Refactoring of reports to manage better output to
37
+ multiple formats
38
+
5
39
  * 1.4.0
6
40
 
7
41
  - [User] The Apache Log report now organizes page requests in four
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- log_sense (1.3.1)
4
+ log_sense (1.4.2)
5
5
  browser
6
6
  ipaddr
7
7
  iso_country_codes
@@ -13,9 +13,9 @@ GEM
13
13
  specs:
14
14
  browser (5.3.1)
15
15
  byebug (11.1.3)
16
- ipaddr (1.2.3)
16
+ ipaddr (1.2.4)
17
17
  iso_country_codes (0.7.8)
18
- minitest (5.14.4)
18
+ minitest (5.15.0)
19
19
  rake (12.3.3)
20
20
  sqlite3 (1.4.2)
21
21
  terminal-table (3.0.2)
@@ -32,4 +32,4 @@ DEPENDENCIES
32
32
  rake (~> 12.0)
33
33
 
34
34
  BUNDLED WITH
35
- 2.2.32
35
+ 2.3.3
data/README.org CHANGED
@@ -19,8 +19,6 @@ LogSense reports the following data:
19
19
  - OS, browsers, and devices
20
20
  - IP Country location, thanks to the DPIP lite country DB
21
21
  - Streaks: resources accessed by a given IP over time
22
- - Potential attacks: access to resources which are not meant to be
23
- served by a web server serving static websites
24
22
  - Performance of Rails requests
25
23
 
26
24
  Filters from the command line allow to analyze specific periods and
@@ -33,6 +31,18 @@ And, of course, the compulsory screenshot:
33
31
  #+ATTR_HTML: :width 80%
34
32
  [[file:./apache-screenshot.png]]
35
33
 
34
+
35
+ * An important word of warning
36
+
37
+ [[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
38
+ user input to forge log entries or inject malicious content into the logs.
39
+
40
+ log_sense sanitizes entries of HTML reports, to try and protect from log
41
+ poisoning. *Log entries and URLs in SQLite3, however, are not sanitized*:
42
+ they are stored and read from the log. This is not, in general, an issue,
43
+ unless you use the data from SQLite in environments in which URLs can be
44
+ opened or code executed.
45
+
36
46
  * Motivation
37
47
 
38
48
  LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
@@ -54,6 +64,7 @@ generated files are then made available on a private area on the web.
54
64
  gem install log_sense
55
65
  #+end_src
56
66
 
67
+
57
68
  * Usage
58
69
 
59
70
  #+begin_src bash :results raw output :wrap example
@@ -62,21 +73,23 @@ generated files are then made available on a private area on the web.
62
73
 
63
74
  #+RESULTS:
64
75
  #+begin_example
65
- Usage: log_sense [options] [logfile]
76
+ Usage: log_sense [options] [logfile ...]
66
77
  --title=TITLE Title to use in the report
67
78
  -f, --input-format=FORMAT Input format (either rails or apache)
68
- -i, --input-file=INPUT_FILE Input file
79
+ -i, --input-files=file,file, Input files (can also be passed directly)
69
80
  -t, --output-format=FORMAT Output format: html, org, txt, sqlite. See below for available formats
70
81
  -o, --output-file=OUTPUT_FILE Output file
71
82
  -b, --begin=DATE Consider entries after or on DATE
72
83
  -e, --end=DATE Consider entries before or on DATE
73
- -l, --limit=N Number of entries to show (defaults to 30)
84
+ -l, --limit=N Limit to the N most requested resources (defaults to 900)
85
+ -w, --width=WIDTH Maximum width of URL and description columns in text reports
74
86
  -c, --crawlers=POLICY Decide what to do with crawlers (applies to Apache Logs)
75
87
  -n, --no-selfpolls Ignore self poll entries (requests from ::1; applies to Apache Logs)
88
+ --verbose Inform about progress (prints to STDERR)
76
89
  -v, --version Prints version information
77
90
  -h, --help Prints this help
78
91
 
79
- This is version 1.3.1
92
+ This is version 1.5.1
80
93
 
81
94
  Output formats
82
95
  rails parsing can produce the following outputs:
@@ -85,6 +98,7 @@ generated files are then made available on a private area on the web.
85
98
  - html
86
99
  apache parsing can produce the following outputs:
87
100
  - sqlite
101
+ - txt
88
102
  - html
89
103
  #+end_example
90
104
 
@@ -95,6 +109,7 @@ log_sense -f apache -i access.log -t txt > access-data.txt
95
109
  log_sense -f rails -i production.log -t html -o performance.txt
96
110
  #+end_example
97
111
 
112
+
98
113
  * Change Log
99
114
 
100
115
  See the [[file:CHANGELOG.org][CHANGELOG]] file.
@@ -109,8 +124,8 @@ Concerning the outputs:
109
124
  - HTML reports use [[https://get.foundation/][Zurb Foundation]], [[https://www.datatables.net/][Data Tables]], and [[https://vega.github.io/vega-lite/][Vega Light]], which
110
125
  are all downloaded from a CDN
111
126
  - The textual format is compatible with [[https://orgmode.org/][Org Mode]] and can be further
112
- processed to any format [[https://orgmode.org/][Org Mode]] can be exported to (including HTML
113
- and PDF)
127
+ processed to any format [[https://orgmode.org/][Org Mode]] can be exported to, including HTML
128
+ and PDF, with the word of warning in the section above.
114
129
 
115
130
  * Author and Contributors
116
131
 
@@ -118,8 +133,8 @@ Concerning the outputs:
118
133
 
119
134
  * Known Bugs
120
135
 
121
- No known bugs; an unknown number of unknown bugs.
122
- (See the open issues for the known bugs.)
136
+ No known bugs; an unknown number of unknown bugs. (See the open issues for
137
+ the known bugs.)
123
138
 
124
139
  * License
125
140
 
data/Rakefile CHANGED
@@ -9,7 +9,21 @@ end
9
9
  require_relative './lib/log_sense/ip_locator.rb'
10
10
 
11
11
  desc "Convert Geolocation DB to sqlite"
12
- task :dbip_to_sqlite3, [:filename] do |tasks, args|
13
- filename = args[:filename]
14
- ApacheLogReport::IpLocator::dbip_to_sqlite filename
12
+ task :dbip_to_sqlite3, [:year_month] do |tasks, args|
13
+ filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
14
+
15
+ if !File.exist? filename
16
+ puts "Error. Could not find: #{filename}"
17
+ puts
18
+ puts 'I see the following files:'
19
+ puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
20
+ puts ''
21
+ puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
22
+ puts '2. Save downloaded file to ip_locations/'
23
+ puts '3. Relaunch with YYYY-MM'
24
+
25
+ exit
26
+ else
27
+ LogSense::IpLocator::dbip_to_sqlite filename
28
+ end
15
29
  end
data/exe/log_sense CHANGED
@@ -7,21 +7,22 @@ require 'log_sense.rb'
7
7
  #
8
8
 
9
9
  # this better be here... OptionsParser consumes ARGV
10
- @command_line = ARGV.join(" ")
11
-
10
+ @command_line = ARGV.join(' ')
12
11
  @options = LogSense::OptionsParser.parse ARGV
13
- @input_file = @options[:input_file] || ARGV[0]
14
12
  @output_file = @options[:output_file]
15
13
 
16
- if not @input_file
17
- puts "Error: no input file specified."
18
- exit
19
- end
14
+ #
15
+ # Input files can be gotten from an option and from what remains in
16
+ # ARGV
17
+ #
18
+ @input_filenames = @options[:input_filenames] + ARGV
19
+ @non_existing = @input_filenames.reject { |x| File.exist?(x) }
20
20
 
21
- if not File.exist? @input_file
22
- puts "Error: input file '#{@input_file}' does not exist"
21
+ unless @non_existing.empty?
22
+ $stderr.puts "Error: input file(s) '#{@non_existing.join(', ')}' do not exist"
23
23
  exit 1
24
24
  end
25
+ @input_files = @input_filenames.empty? ? [$stdin] : @input_filenames.map { |x| File.open(x, 'r') }
25
26
 
26
27
  #
27
28
  # Parse Log and Track Statistics
@@ -36,32 +37,46 @@ when 'apache'
36
37
  when 'rails'
37
38
  parser_klass = LogSense::RailsLogParser
38
39
  cruncher_klass = LogSense::RailsDataCruncher
40
+ else
41
+ $stderr.puts "Error: input format #{@options[:input_format]} not understood."
42
+ exit 1
39
43
  end
40
44
 
41
- @db = parser_klass.parse @input_file
45
+ $stderr.puts "Parsing input files..." if @options[:verbose]
46
+ @db = parser_klass.parse @input_files
42
47
 
43
- if @options[:output_format] == "sqlite"
44
- ddb = SQLite3::Database.new(@output_file || "db.sqlite3")
48
+ if @options[:output_format] == 'sqlite'
49
+ $stderr.puts "Saving to SQLite3..." if @options[:verbose]
50
+ ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
45
51
  b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
46
52
  b.step(-1) #=> DONE
47
53
  b.finish
48
54
  else
55
+ $stderr.puts "Aggregating data..." if @options[:verbose]
49
56
  @data = cruncher_klass.crunch @db, @options
57
+
58
+ $stderr.puts "Geolocating..." if @options[:verbose]
50
59
  @data = LogSense::IpLocator.geolocate @data
51
60
 
61
+ $stderr.puts "Grouping by country..." if @options[:verbose]
62
+ country_col = @data[:ips][0].size - 1
63
+ @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
64
+
52
65
  @ended_at = Time.now
53
66
  @duration = @ended_at - @started_at
54
67
 
55
68
  @data = @data.merge({
56
69
  command: @command_line,
57
- log_file: @input_file,
70
+ filenames: ARGV,
71
+ log_files: @input_files,
58
72
  started_at: @started_at,
59
73
  ended_at: @ended_at,
60
- duration: @duration
74
+ duration: @duration,
75
+ width: @options[:width]
61
76
  })
62
-
63
77
  #
64
78
  # Emit Output
65
79
  #
80
+ $stderr.puts "Emitting..." if @options[:verbose]
66
81
  puts LogSense::Emitter.emit @data, @options
67
82
  end
Binary file
@@ -15,17 +15,17 @@ module LogSense
15
15
  @last_day = last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
16
16
 
17
17
  @total_days = 0
18
- if @first_day and @last_day
19
- @total_days = (@last_day - @first_day).to_i
20
- end
18
+ @total_days = (@last_day - @first_day).to_i if @first_day && @last_day
19
+
20
+ @source_files = db.execute 'SELECT distinct(source_file) from LogLine'
21
21
 
22
- @log_size = db.execute "SELECT count(datetime) from LogLine"
22
+ @log_size = db.execute 'SELECT count(datetime) from LogLine'
23
23
  @log_size = @log_size[0][0]
24
24
 
25
25
  @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
26
26
  @selfpolls_size = @selfpolls_size[0][0]
27
27
 
28
- @crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
28
+ @crawlers_size = db.execute 'SELECT count(datetime) from LogLine where bot == 1'
29
29
  @crawlers_size = @crawlers_size[0][0]
30
30
 
31
31
  @first_day_requested = options[:from_date]
@@ -35,7 +35,7 @@ module LogSense
35
35
  @last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
36
36
 
37
37
  @total_days_in_analysis = 0
38
- if @first_day_in_analysis and @last_day_in_analysis
38
+ if @first_day_in_analysis && @last_day_in_analysis
39
39
  @total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
40
40
  end
41
41
 
@@ -45,24 +45,24 @@ module LogSense
45
45
  filter = [
46
46
  (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
47
47
  (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
48
- (options[:only_crawlers] ? "bot == 1" : nil),
49
- (options[:ignore_crawlers] ? "bot == 0" : nil),
48
+ (options[:only_crawlers] ? 'bot == 1' : nil),
49
+ (options[:ignore_crawlers] ? 'bot == 0' : nil),
50
50
  (options[:no_selfpolls] ? "ip != '::1'" : nil),
51
- "true"
51
+ 'true'
52
52
  ].compact.join " and "
53
53
 
54
54
  mega = 1024 * 1024
55
55
  giga = mega * 1024
56
56
  tera = giga * 1024
57
-
57
+
58
58
  # in alternative to sum(size)
59
59
  human_readable_size = <<-EOS
60
- CASE
60
+ CASE
61
61
  WHEN sum(size) < 1024 THEN sum(size) || ' B'
62
62
  WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
63
63
  WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
64
64
  WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
65
- WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
65
+ WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
66
66
  END AS size
67
67
  EOS
68
68
 
@@ -117,20 +117,19 @@ module LogSense
117
117
 
118
118
  @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
119
119
 
120
- @streaks = db.execute "SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime"
120
+ @streaks = db.execute 'SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime'
121
121
  data = {}
122
122
 
123
- self.instance_variables.each do |variable|
124
- var_as_symbol = variable.to_s[1..-1].to_sym
125
- data[var_as_symbol] = eval(variable.to_s)
123
+ instance_variables.each do |variable|
124
+ var_as_symbol = variable.to_s[1..].to_sym
125
+ data[var_as_symbol] = instance_variable_get(variable)
126
126
  end
127
+
127
128
  data
128
129
  end
129
130
 
130
- private
131
-
132
- def self.date_intersect date1, date2, method
133
- if date1 and date2
131
+ def self.date_intersect(date1, date2, method)
132
+ if date1 && date2
134
133
  [date1, date2].send(method)
135
134
  elsif date1
136
135
  date1
@@ -140,4 +139,3 @@ module LogSense
140
139
  end
141
140
  end
142
141
  end
143
-
@@ -31,20 +31,20 @@ module LogSense
31
31
 
32
32
  TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
33
33
 
34
- HTTP_METHODS=/GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
- WEBDAV_METHODS=/COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
- OTHER_METHODS=/SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
- METHOD=/(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
- PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
- URL=/(?<url>[^ ]+)/
40
- REFERER=/(?<referer>[^"]*)/
41
- RETURN_CODE=/(?<status>[1-5][0-9][0-9])/
42
- SIZE=/(?<size>[0-9]+|-)/
34
+ HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
+ WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
+ OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
+ METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
+ PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
+ URL = /(?<url>[^ ]+)/
40
+ REFERER = /(?<referer>[^"]*)/
41
+ RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
42
+ SIZE = /(?<size>[0-9]+|-)/
43
43
  USER_AGENT = /(?<user_agent>[^"]*)/
44
44
 
45
45
  attr_reader :format
46
46
 
47
- def initialize
47
+ def initialize
48
48
  @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
49
49
  end
50
50
 
@@ -7,10 +7,9 @@ module LogSense
7
7
  # parse an Apache log file and return a SQLite3 DB
8
8
  #
9
9
 
10
- def self.parse filename, options = {}
11
- content = filename ? File.readlines(filename) : ARGF.readlines
10
+ def self.parse(streams, options = {})
11
+ db = SQLite3::Database.new ':memory:'
12
12
 
13
- db = SQLite3::Database.new ":memory:"
14
13
  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
15
14
  id INTEGER PRIMARY KEY AUTOINCREMENT,
16
15
  datetime TEXT,
@@ -28,15 +27,18 @@ module LogSense
28
27
  browser TEXT,
29
28
  browser_version TEXT,
30
29
  platform TEXT,
31
- platform_version TEXT)"
30
+ platform_version TEXT,
31
+ source_file TEXT,
32
+ line_number INTEGER
33
+ )"
32
34
 
33
- ins = db.prepare('insert into LogLine (
34
- datetime,
35
+ ins = db.prepare("insert into LogLine (
36
+ datetime,
35
37
  ip,
36
38
  user,
37
39
  unique_visitor,
38
40
  method,
39
- path,
41
+ path,
40
42
  extension,
41
43
  status,
42
44
  size,
@@ -46,44 +48,50 @@ module LogSense
46
48
  browser,
47
49
  browser_version,
48
50
  platform,
49
- platform_version)
50
- values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
51
+ platform_version,
52
+ source_file,
53
+ line_number
54
+ )
55
+ values (#{Array.new(18, '?').join(', ')})")
51
56
 
52
57
  parser = ApacheLogLineParser.new
53
-
54
- content.each do |line|
55
- begin
56
- hash = parser.parse line
57
- ua = Browser.new(hash[:user_agent], accept_language: "en-us")
58
- ins.execute(
59
- DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
60
- hash[:ip],
61
- hash[:userid],
62
- unique_visitor_id(hash),
63
- hash[:method],
64
- hash[:url],
65
- (hash[:url] ? File.extname(hash[:url]) : ""),
66
- hash[:status],
67
- hash[:size].to_i,
68
- hash[:referer],
69
- hash[:user_agent],
70
- ua.bot? ? 1 : 0,
71
- (ua.name || ""),
72
- (ua.version || ""),
73
- (ua.platform.name || ""),
74
- (ua.platform.version || "")
75
- )
76
- rescue StandardError => e
77
- STDERR.puts e.message
58
+
59
+ streams.each do |stream|
60
+ stream.readlines.each_with_index do |line, line_number|
61
+ begin
62
+ hash = parser.parse line
63
+ ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
64
+ ins.execute(
65
+ DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
66
+ hash[:ip],
67
+ hash[:userid],
68
+ unique_visitor_id(hash),
69
+ hash[:method],
70
+ hash[:url],
71
+ (hash[:url] ? File.extname(hash[:url]) : ''),
72
+ hash[:status],
73
+ hash[:size].to_i,
74
+ hash[:referer],
75
+ hash[:user_agent],
76
+ ua.bot? ? 1 : 0,
77
+ (ua.name || ''),
78
+ (ua.version || ''),
79
+ (ua.platform.name || ''),
80
+ (ua.platform.version || ''),
81
+ stream == $stdin ? "stdin" : stream.path,
82
+ line_number
83
+ )
84
+ rescue StandardError => e
85
+ $stderr.puts e.message
86
+ end
78
87
  end
79
88
  end
80
-
89
+
81
90
  db
82
91
  end
83
92
 
84
93
  def self.unique_visitor_id hash
85
94
  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
86
95
  end
87
-
88
96
  end
89
97
  end