log_sense 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0a915af429fe2492f17bc767c86767e38d37a674af6206fb3b2c0a76e4cad620
4
- data.tar.gz: bf4cc3a1e438b752c9c99fee5f91c85abd38de95dac947c2382e2f9825b51467
3
+ metadata.gz: 126257c949e11f090cc1928a1458572529f71c44a5c20baae35861241dfa7b7b
4
+ data.tar.gz: 33a1ee650598a90ca9adb6a6c2795746dfe8a7735414b22b930d3806b205b318
5
5
  SHA512:
6
- metadata.gz: 5612ef5474aa397132527588d289d9d1eba4f8954b92553e32fd5b856f2bd5441ba09d89d1bf12a0f220c602dcd2e73de2d6e360b6177b162d57adc2726442c9
7
- data.tar.gz: c3b546cc177a3364b1b513f4274c1521aa1669bb17b142eb453ba73251f1e3458e7b9d1703b692ef275a474821ce7375e387155f63e3e7d5d7c9c42e1c50a150
6
+ metadata.gz: f135c70480994434dea0b5ff11bac5b1239d071ae38afeda7dd3672a43e127bba06136d550f29844c60dd2d2640a87c57f7fda683240512d1fe37794d124a433
7
+ data.tar.gz: 65abbe86864aba7d9e6499e6f3ec4a0ef2facf00505ea8611eec3b44279772641bf3a4374045acac0892c8bde19ce37c74a88501fa7905342b1e30032f2b50b4
data/CHANGELOG.org CHANGED
@@ -2,6 +2,18 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.7.0
6
+
7
+ - [User] Fixes a bug with the geolocator
8
+ - [User] Fixes a bug causing a crash when no country was found by the geolocator
9
+ - [User] Fixes bugs related to corner cases (empty logs, wrong parser for log,
10
+ empty geolocation data)
11
+ - [User] Updated DB-IP country file to Jun 2024 version.
12
+ - [User] Refreshed the style a bit, removed Fira Sans and updated versions of
13
+ CSS and JS frameworks
14
+ - [Code] Move options and some code in their own dir
15
+ - [Code] Add rendering view parsing (useful in development; no views yet)
16
+
5
17
  * 1.6.1
6
18
 
7
19
  - Country DB now stores country name.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- log_sense (1.5.3)
4
+ log_sense (1.7.0)
5
5
  browser
6
6
  ipaddr
7
7
  iso_country_codes
@@ -12,24 +12,30 @@ GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
14
  browser (5.3.1)
15
- debug (1.6.2)
16
- irb (>= 1.3.6)
17
- reline (>= 0.3.1)
18
- io-console (0.5.11)
19
- ipaddr (1.2.5)
20
- irb (1.4.1)
21
- reline (>= 0.3.0)
15
+ debug (1.9.2)
16
+ irb (~> 1.10)
17
+ reline (>= 0.3.8)
18
+ io-console (0.7.2)
19
+ ipaddr (1.2.6)
20
+ irb (1.13.1)
21
+ rdoc (>= 4.0.0)
22
+ reline (>= 0.4.2)
22
23
  iso_country_codes (0.7.8)
23
- mini_portile2 (2.8.0)
24
- minitest (5.15.0)
24
+ mini_portile2 (2.8.7)
25
+ minitest (5.23.1)
26
+ psych (5.1.2)
27
+ stringio
25
28
  rake (12.3.3)
26
- reline (0.3.1)
29
+ rdoc (6.7.0)
30
+ psych (>= 4.0.0)
31
+ reline (0.5.8)
27
32
  io-console (~> 0.5)
28
- sqlite3 (1.5.4)
33
+ sqlite3 (2.0.2)
29
34
  mini_portile2 (~> 2.8.0)
35
+ stringio (3.1.0)
30
36
  terminal-table (3.0.2)
31
37
  unicode-display_width (>= 1.1.1, < 3)
32
- unicode-display_width (2.3.0)
38
+ unicode-display_width (2.5.0)
33
39
 
34
40
  PLATFORMS
35
41
  ruby
@@ -41,4 +47,4 @@ DEPENDENCIES
41
47
  rake (~> 12.0)
42
48
 
43
49
  BUNDLED WITH
44
- 2.3.3
50
+ 2.5.3
data/Rakefile CHANGED
@@ -8,9 +8,9 @@ end
8
8
 
9
9
  require_relative './lib/log_sense/ip_locator.rb'
10
10
 
11
- desc "Convert Geolocation DB to sqlite"
11
+ desc "Convert Geolocation DB to sqlite (arg YYYY_MM or filename)"
12
12
  task :dbip, [:filename] do |tasks, args|
13
- filename_or_yyyy_mm = args[:filename]
13
+ filename_or_yyyy_mm = args[:filename] || ""
14
14
 
15
15
  filename = if /\d{4}-\d{2}/.match(filename_or_yyyy_mm)
16
16
  "ip_locations/dbip-country-lite-#{filename_or_yyyy_mm}.csv"
@@ -18,13 +18,15 @@ task :dbip, [:filename] do |tasks, args|
18
18
  filename_or_yyyy_mm
19
19
  end
20
20
 
21
- # if the filename has a .gz extension or a gzipped version of the file
22
- # exists, gunzip it
21
+ # if the filename passed as argument has a .gz extension or a gzipped version
22
+ # of the file passed as argument exists, gunzip it
23
23
  if File.extname(filename) == ".gz" || File.exist?("#{filename}.gz")
24
24
  system "gunzip #{filename}.gz"
25
25
  end
26
26
 
27
- if !File.exist? filename
27
+ if File.exist? filename
28
+ LogSense::IpLocator::dbip_to_sqlite filename
29
+ else
28
30
  puts <<-EOS
29
31
  Error. Could not find: #{filename}
30
32
 
@@ -37,13 +39,10 @@ I see the following files:
37
39
  3. Relaunch with YYYY-MM (will build: dbip-country-lite-YYYY-MM.csv)
38
40
  or with filename.
39
41
 
40
- Remark. If the filename has the extension .gz or if the
41
- filename does not exist, but a file with the same name and .gz extension
42
- exists, it is gunzipped first
42
+ Remark. If the filename has the extension .gz or if the filename does not exist,
43
+ but a file with the same name and .gz extension exists, it is gunzipped first
43
44
  EOS
44
45
 
45
46
  exit
46
- else
47
- LogSense::IpLocator::dbip_to_sqlite filename
48
47
  end
49
48
  end
data/exe/log_sense CHANGED
@@ -9,7 +9,7 @@ require "sqlite3"
9
9
 
10
10
  # this better be here... OptionsParser consumes ARGV
11
11
  @command_line = ARGV.join(" ")
12
- @options = LogSense::OptionsParser.parse ARGV
12
+ @options = LogSense::Options::Parser.parse ARGV
13
13
  @input_filenames = @options[:input_filenames] + ARGV
14
14
  @output_filename = @options[:output_filename]
15
15
 
@@ -20,33 +20,36 @@ require "sqlite3"
20
20
  #
21
21
  # Check input files
22
22
  #
23
- @non_existing = @input_filenames.reject { |x| File.exist?(x) }
24
23
 
24
+ @non_existing = @input_filenames.reject { |x| File.exist?(x) }
25
25
  if @non_existing.any?
26
26
  warn "Error: some input file(s) \"#{@non_existing.join(", ")}\" do not exist"
27
27
  exit 1
28
28
  end
29
29
 
30
- #
31
- # Special condition: sqlite3 requires a single file as input
32
- #
33
- if @input_filenames.size > 0 &&
34
- File.extname(@input_filenames.first) == "sqlite3" &&
35
- @input_filenames.size > 1
36
- warn "Error: you can pass only one sqlite3 file as input"
30
+ @sqlite3_files = @input_filenames.select { |x| File.extname(x).include?("sqlite") }
31
+ if @sqlite3_files.any? && @input_filenames.size != 1
32
+ warn "Error: when passing an SQLite3 DB, this has to be the only input file"
37
33
  exit 1
38
34
  end
39
35
 
36
+ #
37
+ # Check output files
38
+ #
39
+
40
+ # Nothing to be done, here, since we output to STDOUT if no output filename is
41
+ # specified
42
+
40
43
  #
41
44
  # Supported input/output chains
42
45
  #
43
46
  iformat = @options[:input_format]
44
47
  oformat = @options[:output_format]
45
48
 
46
- if !LogSense::OptionsChecker::compatible?(iformat, oformat)
49
+ if !LogSense::Options::Checker.compatible?(iformat, oformat)
47
50
  warn "Error: don't know how to make #{iformat} into #{oformat}."
48
51
  warn "Possible transformation chains:"
49
- warn LogSense::OptionsChecker.chains_to_s
52
+ warn LogSense::Options::Checker.chains_to_s
50
53
  exit 1
51
54
  end
52
55
 
@@ -56,8 +59,11 @@ end
56
59
 
57
60
  @started_at = Time.now
58
61
 
59
- if @input_filenames.size > 0 &&
60
- File.extname(@input_filenames.first) == ".sqlite3"
62
+ #
63
+ # Input
64
+ #
65
+
66
+ if @input_filenames.size > 0 && File.extname(@input_filenames.first) == ".sqlite3"
61
67
  warn "Reading SQLite3 DB ..." if @options[:verbose]
62
68
  @db = SQLite3::Database.open @input_filenames.first
63
69
  else
@@ -67,12 +73,19 @@ else
67
73
  else
68
74
  @input_filenames.map { |fname| File.open(fname, "r") }
69
75
  end
70
- class_name = "LogSense::#{@options[:input_format].capitalize}LogParser"
76
+
77
+ class_name = "LogSense::#{@options[:input_format].capitalize}::LogParser"
71
78
  parser_class = Object.const_get class_name
72
79
  parser = parser_class.new
73
80
  @db = parser.parse @input_files
74
81
  end
75
82
 
83
+ #
84
+ # Output
85
+ #
86
+
87
+ # TODO this code could benefit from some classes abstracting the work a bit
88
+
76
89
  if @options[:output_format] == "sqlite3"
77
90
  warn "Saving SQLite3 DB ..." if @options[:verbose]
78
91
 
@@ -83,7 +96,7 @@ if @options[:output_format] == "sqlite3"
83
96
 
84
97
  exit 0
85
98
  elsif @options[:output_format] == "ufw"
86
- pattern = @options[:pattern] || "php"
99
+ pattern = @options[:pattern]
87
100
 
88
101
  if @options[:input_format] == "rails"
89
102
  query = "select distinct event.ip,event.url
@@ -113,14 +126,18 @@ else
113
126
  aggr = aggr_class.new(@db, @options)
114
127
  @data = aggr.aggregate
115
128
 
116
- if @options[:geolocation]
129
+ if @options[:geolocation] && @data[:ips].size != 0
117
130
  warn "Geolocating ..." if @options[:verbose]
118
- @data = LogSense::IpLocator.geolocate @data
131
+ geolocated_data = LogSense::IpLocator.geolocate @data
119
132
 
120
133
  warn "Grouping IPs by country ..." if @options[:verbose]
121
- country_col = @data[:ips][0].size - 1
122
- @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
134
+ country_col = geolocated_data[0].size - 1
135
+ @data[:countries] = geolocated_data.group_by { |x| x[country_col] }
136
+ elsif @options[:geolocation] && @data[:ips].size == 0
137
+ warn "Skipping geolocation: no IP found" if @options[:verbose]
138
+ @data[:countries] = {}
123
139
  else
140
+ warn "Skipping geolocation." if @options[:verbose]
124
141
  @data[:countries] = {}
125
142
  end
126
143
 
Binary file
@@ -0,0 +1,59 @@
1
+ module LogSense
2
+ module Apache
3
+ # parses a log line and returns a hash
4
+ # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
5
+ #
6
+ # %h: IP
7
+ # %l: ident or -
8
+ # %u: userid or -
9
+ # %t: [10/Oct/2000:13:55:36 -0700]
10
+ # day = 2*digit
11
+ # month = 3*letter
12
+ # year = 4*digit
13
+ # hour = 2*digit
14
+ # minute = 2*digit
15
+ # second = 2*digit
16
+ # zone = (`+' | `-') 4*digit
17
+ # %r: GET /apache_pb.gif HTTP/1.0
18
+ # %{User-agent}: "
19
+ #
20
+ # Example
21
+ # 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
22
+ #
23
+ class LogLineParser
24
+ DAY = /[0-9]{2}/
25
+ MONTH = /[A-Za-z]{3}/
26
+ YEAR = /[0-9]{4}/
27
+ TIMEC = /[0-9]{2}/
28
+ TIMEZONE = /(\+|-)[0-9]{4}/
29
+
30
+ IP = /(?<ip>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}|::1|unknown)/
31
+ IDENT = /(?<ident>[^ ]+|-)/
32
+ USERID = /(?<userid>[^ ]+|-)/
33
+
34
+ TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
35
+
36
+ HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
37
+ WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
38
+ OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
39
+ METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
40
+ PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
41
+ URL = /(?<url>[^ ]+)/
42
+ REFERER = /(?<referer>[^"]*)/
43
+ RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
44
+ SIZE = /(?<size>[0-9]+|-)/
45
+ USER_AGENT = /(?<user_agent>[^"]*)/
46
+
47
+ attr_reader :format
48
+
49
+ def initialize
50
+ @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
51
+ end
52
+
53
+ def parse(line)
54
+ @format.match(line) ||
55
+ raise("Apache LogLine Parser Error: Could not parse #{line}")
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,101 @@
1
+ require "sqlite3"
2
+ require "browser"
3
+ require_relative "log_line_parser"
4
+
5
+ module LogSense
6
+ module Apache
7
+ #
8
+ # parse an Apache log file and return a SQLite3 DB
9
+ #
10
+ class LogParser
11
+ def parse(streams, options = {})
12
+ db = SQLite3::Database.new ":memory:"
13
+
14
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
15
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
16
+ datetime TEXT,
17
+ ip TEXT,
18
+ user TEXT,
19
+ unique_visitor TEXT,
20
+ method TEXT,
21
+ path TEXT,
22
+ extension TEXT,
23
+ status TEXT,
24
+ size INTEGER,
25
+ referer TEXT,
26
+ user_agent TEXT,
27
+ bot INTEGER,
28
+ browser TEXT,
29
+ browser_version TEXT,
30
+ platform TEXT,
31
+ platform_version TEXT,
32
+ source_file TEXT,
33
+ line_number INTEGER
34
+ )"
35
+
36
+ ins = db.prepare("insert into LogLine (
37
+ datetime,
38
+ ip,
39
+ user,
40
+ unique_visitor,
41
+ method,
42
+ path,
43
+ extension,
44
+ status,
45
+ size,
46
+ referer,
47
+ user_agent,
48
+ bot,
49
+ browser,
50
+ browser_version,
51
+ platform,
52
+ platform_version,
53
+ source_file,
54
+ line_number
55
+ )
56
+ values (#{Array.new(18, '?').join(', ')})")
57
+
58
+ parser = LogLineParser.new
59
+
60
+ streams.each do |stream|
61
+ stream.readlines.each_with_index do |line, line_number|
62
+ begin
63
+ hash = parser.parse line
64
+ ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
65
+ ins.execute(
66
+ DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
67
+ hash[:ip],
68
+ hash[:userid],
69
+ unique_visitor_id(hash),
70
+ hash[:method],
71
+ hash[:url],
72
+ (hash[:url] ? File.extname(hash[:url]) : ''),
73
+ hash[:status],
74
+ hash[:size].to_i,
75
+ hash[:referer],
76
+ hash[:user_agent],
77
+ ua.bot? ? 1 : 0,
78
+ (ua.name || ''),
79
+ (ua.version || ''),
80
+ (ua.platform.name || ''),
81
+ (ua.platform.version || ''),
82
+ stream == $stdin ? "stdin" : stream.path,
83
+ line_number
84
+ )
85
+ rescue StandardError => e
86
+ warn e.message
87
+ end
88
+ end
89
+ end
90
+
91
+ db
92
+ end
93
+
94
+ private
95
+
96
+ def unique_visitor_id hash
97
+ "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
98
+ end
99
+ end
100
+ end
101
+ end
@@ -10,18 +10,18 @@ module LogSense
10
10
  #
11
11
  class Emitter
12
12
  CDN_CSS = [
13
- "https://cdnjs.cloudflare.com/ajax/libs/foundicons/3.0.0/foundation-icons.min.css",
14
- "https://cdn.jsdelivr.net/npm/foundation-sites@6.7.5/dist/css/foundation.min.css",
15
- "https://cdn.datatables.net/v/zf/dt-1.11.3/datatables.min.css"
13
+ # "https://cdnjs.cloudflare.com/ajax/libs/foundicons/3.0.0/foundation-icons.min.css",
14
+ "https://cdn.jsdelivr.net/npm/foundation-sites@6.8.1/dist/css/foundation.min.css",
15
+ "https://cdn.datatables.net/v/zf/dt-2.0.8/datatables.min.css"
16
16
  ].freeze
17
17
 
18
18
  CDN_JS = [
19
- "https://code.jquery.com/jquery-3.6.2.min.js",
20
- "https://cdn.datatables.net/v/zf/dt-1.13.1/datatables.min.js",
21
- "https://cdn.jsdelivr.net/npm/foundation-sites@6.7.5/dist/js/foundation.min.js",
22
- "https://cdn.jsdelivr.net/npm/vega@5.22.1",
23
- "https://cdn.jsdelivr.net/npm/vega-lite@5.6.0",
24
- "https://cdn.jsdelivr.net/npm/vega-embed@6.21.0"
19
+ "https://code.jquery.com/jquery-3.7.1.min.js",
20
+ "https://cdn.datatables.net/v/zf/dt-2.0.8/datatables.min.js",
21
+ "https://cdn.jsdelivr.net/npm/foundation-sites@6.8.1/dist/js/foundation.min.js",
22
+ "https://cdn.jsdelivr.net/npm/vega@5.28.0",
23
+ "https://cdn.jsdelivr.net/npm/vega-lite@5.18.1",
24
+ "https://cdn.jsdelivr.net/npm/vega-embed@6.25.0"
25
25
  ].freeze
26
26
 
27
27
  def self.emit(reports = {}, data = {}, options = {})
@@ -66,21 +66,27 @@ module LogSense
66
66
  end
67
67
  end
68
68
 
69
- def self.escape_javascript(string)
70
- js_escape_map = {
71
- #"&" => "&amp;",
72
- #"%" => "&#37;",
73
- "<" => "&lt;",
74
- "\\" => "&bsol;",
75
- '"' => ' \\"',
76
- "'" => " \\'",
77
- "`" => " \\`",
78
- "$" => " \\$"
79
- }
80
- js_escape_map.each do |match, replace|
81
- string = string.gsub(match, replace)
69
+ # taken from Ruby on Rails
70
+ JS_ESCAPE_MAP = {
71
+ "\\" => "\\\\",
72
+ "</" => '<\/',
73
+ "\r\n" => '\n',
74
+ "\n" => '\n',
75
+ "\r" => '\n',
76
+ '"' => '\\"',
77
+ "'" => "\\'",
78
+ "`" => "\\`",
79
+ "$" => "\\$"
80
+ }
81
+
82
+ # taken from Ruby on Rails
83
+ def self.escape_javascript(javascript)
84
+ javascript = javascript.to_s
85
+ if javascript.empty?
86
+ ""
87
+ else
88
+ javascript.gsub(/(\\|<\/|\r\n|\342\200\250|\342\200\251|[\n\r"']|[`]|[$])/u, JS_ESCAPE_MAP)
82
89
  end
83
- string
84
90
  end
85
91
 
86
92
  def self.slugify(string)
@@ -46,7 +46,7 @@ module LogSense
46
46
  end
47
47
 
48
48
  def merge(parser_db)
49
- ipdb = Sqlite3::Database.open DB_FILE
49
+ Sqlite3::Database.open DB_FILE
50
50
  parser_db
51
51
  end
52
52
 
@@ -75,11 +75,14 @@ module LogSense
75
75
  def self.geolocate(data)
76
76
  @location_db = IpLocator.load_db
77
77
 
78
- data[:ips].each do |line|
79
- country_code = IpLocator.locate_ip line[0], @location_db
80
- line << country_code
78
+ data[:ips].map do |line|
79
+ begin
80
+ country_code = IpLocator.locate_ip line[0], @location_db
81
+ line + [country_code]
82
+ rescue
83
+ line + ["INVALID IP"]
84
+ end
81
85
  end
82
- data
83
86
  end
84
87
  end
85
88
  end
@@ -0,0 +1,26 @@
1
+ module LogSense
2
+ #
3
+ # Check options and return appropriate error if
4
+ # combinations of command arguments are wrong
5
+ #
6
+ module Options
7
+ module Checker
8
+ SUPPORTED_CHAINS = {
9
+ rails: %i[txt html sqlite3 ufw],
10
+ apache: %i[txt html sqlite3 ufw]
11
+ }.freeze
12
+
13
+ def self.compatible?(iformat, oformat)
14
+ (SUPPORTED_CHAINS[iformat.to_sym] || []).include? oformat.to_sym
15
+ end
16
+
17
+ def self.chains_to_s
18
+ string = ""
19
+ SUPPORTED_CHAINS.each do |iformat, oformat|
20
+ string << "- #{iformat}: #{oformat.join(", ")}\n"
21
+ end
22
+ string
23
+ end
24
+ end
25
+ end
26
+ end