log_sense 1.6.1 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0a915af429fe2492f17bc767c86767e38d37a674af6206fb3b2c0a76e4cad620
4
- data.tar.gz: bf4cc3a1e438b752c9c99fee5f91c85abd38de95dac947c2382e2f9825b51467
3
+ metadata.gz: 126257c949e11f090cc1928a1458572529f71c44a5c20baae35861241dfa7b7b
4
+ data.tar.gz: 33a1ee650598a90ca9adb6a6c2795746dfe8a7735414b22b930d3806b205b318
5
5
  SHA512:
6
- metadata.gz: 5612ef5474aa397132527588d289d9d1eba4f8954b92553e32fd5b856f2bd5441ba09d89d1bf12a0f220c602dcd2e73de2d6e360b6177b162d57adc2726442c9
7
- data.tar.gz: c3b546cc177a3364b1b513f4274c1521aa1669bb17b142eb453ba73251f1e3458e7b9d1703b692ef275a474821ce7375e387155f63e3e7d5d7c9c42e1c50a150
6
+ metadata.gz: f135c70480994434dea0b5ff11bac5b1239d071ae38afeda7dd3672a43e127bba06136d550f29844c60dd2d2640a87c57f7fda683240512d1fe37794d124a433
7
+ data.tar.gz: 65abbe86864aba7d9e6499e6f3ec4a0ef2facf00505ea8611eec3b44279772641bf3a4374045acac0892c8bde19ce37c74a88501fa7905342b1e30032f2b50b4
data/CHANGELOG.org CHANGED
@@ -2,6 +2,18 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.7.0
6
+
7
+ - [User] Fixes a bug with the geolocator
8
+ - [User] Fixes a bug causing a crash when no country was found by the geolocator
9
+ - [User] Fixes bugs related to corner cases (empty logs, wrong parser for log,
10
+ empty geolocation data)
11
+ - [User] Updated DB-IP country file to Jun 2024 version.
12
+ - [User] Refreshed the style a bit, removed Fira Sans and updated versions of
13
+ CSS and JS frameworks
14
+ - [Code] Move options and some code in their own dir
15
+ - [Code] Add rendering view parsing (useful in development; no views yet)
16
+
5
17
  * 1.6.1
6
18
 
7
19
  - Country DB now stores country name.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- log_sense (1.5.3)
4
+ log_sense (1.7.0)
5
5
  browser
6
6
  ipaddr
7
7
  iso_country_codes
@@ -12,24 +12,30 @@ GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
14
  browser (5.3.1)
15
- debug (1.6.2)
16
- irb (>= 1.3.6)
17
- reline (>= 0.3.1)
18
- io-console (0.5.11)
19
- ipaddr (1.2.5)
20
- irb (1.4.1)
21
- reline (>= 0.3.0)
15
+ debug (1.9.2)
16
+ irb (~> 1.10)
17
+ reline (>= 0.3.8)
18
+ io-console (0.7.2)
19
+ ipaddr (1.2.6)
20
+ irb (1.13.1)
21
+ rdoc (>= 4.0.0)
22
+ reline (>= 0.4.2)
22
23
  iso_country_codes (0.7.8)
23
- mini_portile2 (2.8.0)
24
- minitest (5.15.0)
24
+ mini_portile2 (2.8.7)
25
+ minitest (5.23.1)
26
+ psych (5.1.2)
27
+ stringio
25
28
  rake (12.3.3)
26
- reline (0.3.1)
29
+ rdoc (6.7.0)
30
+ psych (>= 4.0.0)
31
+ reline (0.5.8)
27
32
  io-console (~> 0.5)
28
- sqlite3 (1.5.4)
33
+ sqlite3 (2.0.2)
29
34
  mini_portile2 (~> 2.8.0)
35
+ stringio (3.1.0)
30
36
  terminal-table (3.0.2)
31
37
  unicode-display_width (>= 1.1.1, < 3)
32
- unicode-display_width (2.3.0)
38
+ unicode-display_width (2.5.0)
33
39
 
34
40
  PLATFORMS
35
41
  ruby
@@ -41,4 +47,4 @@ DEPENDENCIES
41
47
  rake (~> 12.0)
42
48
 
43
49
  BUNDLED WITH
44
- 2.3.3
50
+ 2.5.3
data/Rakefile CHANGED
@@ -8,9 +8,9 @@ end
8
8
 
9
9
  require_relative './lib/log_sense/ip_locator.rb'
10
10
 
11
- desc "Convert Geolocation DB to sqlite"
11
+ desc "Convert Geolocation DB to sqlite (arg YYYY_MM or filename)"
12
12
  task :dbip, [:filename] do |tasks, args|
13
- filename_or_yyyy_mm = args[:filename]
13
+ filename_or_yyyy_mm = args[:filename] || ""
14
14
 
15
15
  filename = if /\d{4}-\d{2}/.match(filename_or_yyyy_mm)
16
16
  "ip_locations/dbip-country-lite-#{filename_or_yyyy_mm}.csv"
@@ -18,13 +18,15 @@ task :dbip, [:filename] do |tasks, args|
18
18
  filename_or_yyyy_mm
19
19
  end
20
20
 
21
- # if the filename has a .gz extension or a gzipped version of the file
22
- # exists, gunzip it
21
+ # if the filename passed as argument has a .gz extension or a gzipped version
22
+ # of the file passed as argument exists, gunzip it
23
23
  if File.extname(filename) == ".gz" || File.exist?("#{filename}.gz")
24
24
  system "gunzip #{filename}.gz"
25
25
  end
26
26
 
27
- if !File.exist? filename
27
+ if File.exist? filename
28
+ LogSense::IpLocator::dbip_to_sqlite filename
29
+ else
28
30
  puts <<-EOS
29
31
  Error. Could not find: #{filename}
30
32
 
@@ -37,13 +39,10 @@ I see the following files:
37
39
  3. Relaunch with YYYY-MM (will build: dbip-country-lite-YYYY-MM.csv)
38
40
  or with filename.
39
41
 
40
- Remark. If the filename has the extension .gz or if the
41
- filename does not exist, but a file with the same name and .gz extension
42
- exists, it is gunzipped first
42
+ Remark. If the filename has the extension .gz or if the filename does not exist,
43
+ but a file with the same name and .gz extension exists, it is gunzipped first
43
44
  EOS
44
45
 
45
46
  exit
46
- else
47
- LogSense::IpLocator::dbip_to_sqlite filename
48
47
  end
49
48
  end
data/exe/log_sense CHANGED
@@ -9,7 +9,7 @@ require "sqlite3"
9
9
 
10
10
  # this better be here... OptionsParser consumes ARGV
11
11
  @command_line = ARGV.join(" ")
12
- @options = LogSense::OptionsParser.parse ARGV
12
+ @options = LogSense::Options::Parser.parse ARGV
13
13
  @input_filenames = @options[:input_filenames] + ARGV
14
14
  @output_filename = @options[:output_filename]
15
15
 
@@ -20,33 +20,36 @@ require "sqlite3"
20
20
  #
21
21
  # Check input files
22
22
  #
23
- @non_existing = @input_filenames.reject { |x| File.exist?(x) }
24
23
 
24
+ @non_existing = @input_filenames.reject { |x| File.exist?(x) }
25
25
  if @non_existing.any?
26
26
  warn "Error: some input file(s) \"#{@non_existing.join(", ")}\" do not exist"
27
27
  exit 1
28
28
  end
29
29
 
30
- #
31
- # Special condition: sqlite3 requires a single file as input
32
- #
33
- if @input_filenames.size > 0 &&
34
- File.extname(@input_filenames.first) == "sqlite3" &&
35
- @input_filenames.size > 1
36
- warn "Error: you can pass only one sqlite3 file as input"
30
+ @sqlite3_files = @input_filenames.select { |x| File.extname(x).include?("sqlite") }
31
+ if @sqlite3_files.any? && @input_filenames.size != 1
32
+ warn "Error: when passing an SQLite3 DB, this has to be the only input file"
37
33
  exit 1
38
34
  end
39
35
 
36
+ #
37
+ # Check output files
38
+ #
39
+
40
+ # Nothing to be done, here, since we output to STDOUT if no output filename is
41
+ # specified
42
+
40
43
  #
41
44
  # Supported input/output chains
42
45
  #
43
46
  iformat = @options[:input_format]
44
47
  oformat = @options[:output_format]
45
48
 
46
- if !LogSense::OptionsChecker::compatible?(iformat, oformat)
49
+ if !LogSense::Options::Checker.compatible?(iformat, oformat)
47
50
  warn "Error: don't know how to make #{iformat} into #{oformat}."
48
51
  warn "Possible transformation chains:"
49
- warn LogSense::OptionsChecker.chains_to_s
52
+ warn LogSense::Options::Checker.chains_to_s
50
53
  exit 1
51
54
  end
52
55
 
@@ -56,8 +59,11 @@ end
56
59
 
57
60
  @started_at = Time.now
58
61
 
59
- if @input_filenames.size > 0 &&
60
- File.extname(@input_filenames.first) == ".sqlite3"
62
+ #
63
+ # Input
64
+ #
65
+
66
+ if @input_filenames.size > 0 && File.extname(@input_filenames.first) == ".sqlite3"
61
67
  warn "Reading SQLite3 DB ..." if @options[:verbose]
62
68
  @db = SQLite3::Database.open @input_filenames.first
63
69
  else
@@ -67,12 +73,19 @@ else
67
73
  else
68
74
  @input_filenames.map { |fname| File.open(fname, "r") }
69
75
  end
70
- class_name = "LogSense::#{@options[:input_format].capitalize}LogParser"
76
+
77
+ class_name = "LogSense::#{@options[:input_format].capitalize}::LogParser"
71
78
  parser_class = Object.const_get class_name
72
79
  parser = parser_class.new
73
80
  @db = parser.parse @input_files
74
81
  end
75
82
 
83
+ #
84
+ # Output
85
+ #
86
+
87
+ # TODO this code could benefit from some classes abstracting the work a bit
88
+
76
89
  if @options[:output_format] == "sqlite3"
77
90
  warn "Saving SQLite3 DB ..." if @options[:verbose]
78
91
 
@@ -83,7 +96,7 @@ if @options[:output_format] == "sqlite3"
83
96
 
84
97
  exit 0
85
98
  elsif @options[:output_format] == "ufw"
86
- pattern = @options[:pattern] || "php"
99
+ pattern = @options[:pattern]
87
100
 
88
101
  if @options[:input_format] == "rails"
89
102
  query = "select distinct event.ip,event.url
@@ -113,14 +126,18 @@ else
113
126
  aggr = aggr_class.new(@db, @options)
114
127
  @data = aggr.aggregate
115
128
 
116
- if @options[:geolocation]
129
+ if @options[:geolocation] && @data[:ips].size != 0
117
130
  warn "Geolocating ..." if @options[:verbose]
118
- @data = LogSense::IpLocator.geolocate @data
131
+ geolocated_data = LogSense::IpLocator.geolocate @data
119
132
 
120
133
  warn "Grouping IPs by country ..." if @options[:verbose]
121
- country_col = @data[:ips][0].size - 1
122
- @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
134
+ country_col = geolocated_data[0].size - 1
135
+ @data[:countries] = geolocated_data.group_by { |x| x[country_col] }
136
+ elsif @options[:geolocation] && @data[:ips].size == 0
137
+ warn "Skipping geolocation: no IP found" if @options[:verbose]
138
+ @data[:countries] = {}
123
139
  else
140
+ warn "Skipping geolocation." if @options[:verbose]
124
141
  @data[:countries] = {}
125
142
  end
126
143
 
Binary file
@@ -0,0 +1,59 @@
1
+ module LogSense
2
+ module Apache
3
+ # parses a log line and returns a hash
4
+ # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
5
+ #
6
+ # %h: IP
7
+ # %l: ident or -
8
+ # %u: userid or -
9
+ # %t: [10/Oct/2000:13:55:36 -0700]
10
+ # day = 2*digit
11
+ # month = 3*letter
12
+ # year = 4*digit
13
+ # hour = 2*digit
14
+ # minute = 2*digit
15
+ # second = 2*digit
16
+ # zone = (`+' | `-') 4*digit
17
+ # %r: GET /apache_pb.gif HTTP/1.0
18
+ # %{User-agent}: "
19
+ #
20
+ # Example
21
+ # 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
22
+ #
23
+ class LogLineParser
24
+ DAY = /[0-9]{2}/
25
+ MONTH = /[A-Za-z]{3}/
26
+ YEAR = /[0-9]{4}/
27
+ TIMEC = /[0-9]{2}/
28
+ TIMEZONE = /(\+|-)[0-9]{4}/
29
+
30
+ IP = /(?<ip>[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}|::1|unknown)/
31
+ IDENT = /(?<ident>[^ ]+|-)/
32
+ USERID = /(?<userid>[^ ]+|-)/
33
+
34
+ TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
35
+
36
+ HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
37
+ WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
38
+ OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
39
+ METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
40
+ PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
41
+ URL = /(?<url>[^ ]+)/
42
+ REFERER = /(?<referer>[^"]*)/
43
+ RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
44
+ SIZE = /(?<size>[0-9]+|-)/
45
+ USER_AGENT = /(?<user_agent>[^"]*)/
46
+
47
+ attr_reader :format
48
+
49
+ def initialize
50
+ @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
51
+ end
52
+
53
+ def parse(line)
54
+ @format.match(line) ||
55
+ raise("Apache LogLine Parser Error: Could not parse #{line}")
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,101 @@
1
+ require "sqlite3"
2
+ require "browser"
3
+ require_relative "log_line_parser"
4
+
5
+ module LogSense
6
+ module Apache
7
+ #
8
+ # parse an Apache log file and return a SQLite3 DB
9
+ #
10
+ class LogParser
11
+ def parse(streams, options = {})
12
+ db = SQLite3::Database.new ":memory:"
13
+
14
+ db.execute "CREATE TABLE IF NOT EXISTS LogLine(
15
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
16
+ datetime TEXT,
17
+ ip TEXT,
18
+ user TEXT,
19
+ unique_visitor TEXT,
20
+ method TEXT,
21
+ path TEXT,
22
+ extension TEXT,
23
+ status TEXT,
24
+ size INTEGER,
25
+ referer TEXT,
26
+ user_agent TEXT,
27
+ bot INTEGER,
28
+ browser TEXT,
29
+ browser_version TEXT,
30
+ platform TEXT,
31
+ platform_version TEXT,
32
+ source_file TEXT,
33
+ line_number INTEGER
34
+ )"
35
+
36
+ ins = db.prepare("insert into LogLine (
37
+ datetime,
38
+ ip,
39
+ user,
40
+ unique_visitor,
41
+ method,
42
+ path,
43
+ extension,
44
+ status,
45
+ size,
46
+ referer,
47
+ user_agent,
48
+ bot,
49
+ browser,
50
+ browser_version,
51
+ platform,
52
+ platform_version,
53
+ source_file,
54
+ line_number
55
+ )
56
+ values (#{Array.new(18, '?').join(', ')})")
57
+
58
+ parser = LogLineParser.new
59
+
60
+ streams.each do |stream|
61
+ stream.readlines.each_with_index do |line, line_number|
62
+ begin
63
+ hash = parser.parse line
64
+ ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
65
+ ins.execute(
66
+ DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
67
+ hash[:ip],
68
+ hash[:userid],
69
+ unique_visitor_id(hash),
70
+ hash[:method],
71
+ hash[:url],
72
+ (hash[:url] ? File.extname(hash[:url]) : ''),
73
+ hash[:status],
74
+ hash[:size].to_i,
75
+ hash[:referer],
76
+ hash[:user_agent],
77
+ ua.bot? ? 1 : 0,
78
+ (ua.name || ''),
79
+ (ua.version || ''),
80
+ (ua.platform.name || ''),
81
+ (ua.platform.version || ''),
82
+ stream == $stdin ? "stdin" : stream.path,
83
+ line_number
84
+ )
85
+ rescue StandardError => e
86
+ warn e.message
87
+ end
88
+ end
89
+ end
90
+
91
+ db
92
+ end
93
+
94
+ private
95
+
96
+ def unique_visitor_id hash
97
+ "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
98
+ end
99
+ end
100
+ end
101
+ end
@@ -10,18 +10,18 @@ module LogSense
10
10
  #
11
11
  class Emitter
12
12
  CDN_CSS = [
13
- "https://cdnjs.cloudflare.com/ajax/libs/foundicons/3.0.0/foundation-icons.min.css",
14
- "https://cdn.jsdelivr.net/npm/foundation-sites@6.7.5/dist/css/foundation.min.css",
15
- "https://cdn.datatables.net/v/zf/dt-1.11.3/datatables.min.css"
13
+ # "https://cdnjs.cloudflare.com/ajax/libs/foundicons/3.0.0/foundation-icons.min.css",
14
+ "https://cdn.jsdelivr.net/npm/foundation-sites@6.8.1/dist/css/foundation.min.css",
15
+ "https://cdn.datatables.net/v/zf/dt-2.0.8/datatables.min.css"
16
16
  ].freeze
17
17
 
18
18
  CDN_JS = [
19
- "https://code.jquery.com/jquery-3.6.2.min.js",
20
- "https://cdn.datatables.net/v/zf/dt-1.13.1/datatables.min.js",
21
- "https://cdn.jsdelivr.net/npm/foundation-sites@6.7.5/dist/js/foundation.min.js",
22
- "https://cdn.jsdelivr.net/npm/vega@5.22.1",
23
- "https://cdn.jsdelivr.net/npm/vega-lite@5.6.0",
24
- "https://cdn.jsdelivr.net/npm/vega-embed@6.21.0"
19
+ "https://code.jquery.com/jquery-3.7.1.min.js",
20
+ "https://cdn.datatables.net/v/zf/dt-2.0.8/datatables.min.js",
21
+ "https://cdn.jsdelivr.net/npm/foundation-sites@6.8.1/dist/js/foundation.min.js",
22
+ "https://cdn.jsdelivr.net/npm/vega@5.28.0",
23
+ "https://cdn.jsdelivr.net/npm/vega-lite@5.18.1",
24
+ "https://cdn.jsdelivr.net/npm/vega-embed@6.25.0"
25
25
  ].freeze
26
26
 
27
27
  def self.emit(reports = {}, data = {}, options = {})
@@ -66,21 +66,27 @@ module LogSense
66
66
  end
67
67
  end
68
68
 
69
- def self.escape_javascript(string)
70
- js_escape_map = {
71
- #"&" => "&amp;",
72
- #"%" => "&#37;",
73
- "<" => "&lt;",
74
- "\\" => "&bsol;",
75
- '"' => ' \\"',
76
- "'" => " \\'",
77
- "`" => " \\`",
78
- "$" => " \\$"
79
- }
80
- js_escape_map.each do |match, replace|
81
- string = string.gsub(match, replace)
69
+ # taken from Ruby on Rails
70
+ JS_ESCAPE_MAP = {
71
+ "\\" => "\\\\",
72
+ "</" => '<\/',
73
+ "\r\n" => '\n',
74
+ "\n" => '\n',
75
+ "\r" => '\n',
76
+ '"' => '\\"',
77
+ "'" => "\\'",
78
+ "`" => "\\`",
79
+ "$" => "\\$"
80
+ }
81
+
82
+ # taken from Ruby on Rails
83
+ def self.escape_javascript(javascript)
84
+ javascript = javascript.to_s
85
+ if javascript.empty?
86
+ ""
87
+ else
88
+ javascript.gsub(/(\\|<\/|\r\n|\342\200\250|\342\200\251|[\n\r"']|[`]|[$])/u, JS_ESCAPE_MAP)
82
89
  end
83
- string
84
90
  end
85
91
 
86
92
  def self.slugify(string)
@@ -46,7 +46,7 @@ module LogSense
46
46
  end
47
47
 
48
48
  def merge(parser_db)
49
- ipdb = Sqlite3::Database.open DB_FILE
49
+ Sqlite3::Database.open DB_FILE
50
50
  parser_db
51
51
  end
52
52
 
@@ -75,11 +75,14 @@ module LogSense
75
75
  def self.geolocate(data)
76
76
  @location_db = IpLocator.load_db
77
77
 
78
- data[:ips].each do |line|
79
- country_code = IpLocator.locate_ip line[0], @location_db
80
- line << country_code
78
+ data[:ips].map do |line|
79
+ begin
80
+ country_code = IpLocator.locate_ip line[0], @location_db
81
+ line + [country_code]
82
+ rescue
83
+ line + ["INVALID IP"]
84
+ end
81
85
  end
82
- data
83
86
  end
84
87
  end
85
88
  end
@@ -0,0 +1,26 @@
1
+ module LogSense
2
+ #
3
+ # Check options and return appropriate error if
4
+ # combinations of command arguments are wrong
5
+ #
6
+ module Options
7
+ module Checker
8
+ SUPPORTED_CHAINS = {
9
+ rails: %i[txt html sqlite3 ufw],
10
+ apache: %i[txt html sqlite3 ufw]
11
+ }.freeze
12
+
13
+ def self.compatible?(iformat, oformat)
14
+ (SUPPORTED_CHAINS[iformat.to_sym] || []).include? oformat.to_sym
15
+ end
16
+
17
+ def self.chains_to_s
18
+ string = ""
19
+ SUPPORTED_CHAINS.each do |iformat, oformat|
20
+ string << "- #{iformat}: #{oformat.join(", ")}\n"
21
+ end
22
+ string
23
+ end
24
+ end
25
+ end
26
+ end