log_sense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/CHANGELOG.org +27 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +37 -0
- data/LICENSE.txt +21 -0
- data/README.org +114 -0
- data/Rakefile +15 -0
- data/alr-styles.css +61 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/log_sense +66 -0
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/apache_data_cruncher.rb +131 -0
- data/lib/log_sense/apache_log_parser.rb +87 -0
- data/lib/log_sense/emitter.rb +49 -0
- data/lib/log_sense/ip_locator.rb +55 -0
- data/lib/log_sense/options_parser.rb +86 -0
- data/lib/log_sense/rails_data_cruncher.rb +117 -0
- data/lib/log_sense/rails_log_parser.rb +176 -0
- data/lib/log_sense/templates/#apache.org.erb# +266 -0
- data/lib/log_sense/templates/.#apache.org.erb +1 -0
- data/lib/log_sense/templates/_output_table.html.erb +25 -0
- data/lib/log_sense/templates/apache.html.erb +328 -0
- data/lib/log_sense/templates/apache.org.erb +266 -0
- data/lib/log_sense/templates/rails.txt.erb +39 -0
- data/lib/log_sense/version.rb +3 -0
- data/lib/log_sense.rb +8 -0
- data/log_sense.gemspec +39 -0
- metadata +189 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6c9423e2199ce12c9ebf30af86a3b9f2ee2edf63de1555f12dbe09c8b798179e
|
4
|
+
data.tar.gz: 59ccd77c5a7d65943705f5b15a4ec8427f12c63ae9b1468b16e46bfe36cd53ce
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1f4dadc41047040dc2e36a91b55e2f9d5d759556d8e5d8a77557d3088eda873f8ac4dc3968f7d0617b6aefa5b5507fbe1008f1baaf5aa67ab430bedb0b858bf2
|
7
|
+
data.tar.gz: 699e717c1118196b7cf90bcee9774f85b3e298ce15e1e1bceb0715a7bcd5094c9c933ee416663a0a28eb1f3b3bc084fb15060a6a6fb3d8bdfb19b2c44d61bf60
|
data/.gitignore
ADDED
data/CHANGELOG.org
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#+TITLE: ChangeLog
|
2
|
+
#+AUTHOR: Adolfo Villafiorita
|
3
|
+
#+STARTUP: showall
|
4
|
+
|
5
|
+
* Unreleased
|
6
|
+
|
7
|
+
This changes are in the repository but not yet released to Rubygems.
|
8
|
+
|
9
|
+
** New Functions and Changes
|
10
|
+
|
11
|
+
** Fixes
|
12
|
+
|
13
|
+
** Documentation
|
14
|
+
|
15
|
+
** Code
|
16
|
+
|
17
|
+
|
18
|
+
* Version 1.0.0
|
19
|
+
|
20
|
+
** New Functions and Changes
|
21
|
+
|
22
|
+
** Fixes
|
23
|
+
|
24
|
+
** Documentation
|
25
|
+
|
26
|
+
** Code
|
27
|
+
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
log_sense (1.2.0)
|
5
|
+
apache_log-parser
|
6
|
+
browser
|
7
|
+
ipaddr
|
8
|
+
iso_country_codes
|
9
|
+
sqlite3
|
10
|
+
terminal-table
|
11
|
+
|
12
|
+
GEM
|
13
|
+
remote: https://rubygems.org/
|
14
|
+
specs:
|
15
|
+
apache_log-parser (3.1.2)
|
16
|
+
browser (5.3.1)
|
17
|
+
byebug (11.1.3)
|
18
|
+
ipaddr (1.2.3)
|
19
|
+
iso_country_codes (0.7.8)
|
20
|
+
minitest (5.14.4)
|
21
|
+
rake (12.3.3)
|
22
|
+
sqlite3 (1.4.2)
|
23
|
+
terminal-table (3.0.2)
|
24
|
+
unicode-display_width (>= 1.1.1, < 3)
|
25
|
+
unicode-display_width (2.1.0)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
ruby
|
29
|
+
|
30
|
+
DEPENDENCIES
|
31
|
+
byebug
|
32
|
+
log_sense!
|
33
|
+
minitest
|
34
|
+
rake (~> 12.0)
|
35
|
+
|
36
|
+
BUNDLED WITH
|
37
|
+
2.2.29
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 Adolfo Villafiorita
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.org
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
#+TITLE: README
|
2
|
+
#+AUTHOR: Adolfo Villafiorita
|
3
|
+
#+STARTUP: showall
|
4
|
+
|
5
|
+
* Introduction
|
6
|
+
|
7
|
+
LogSense generates reports and statistics from Apache web logs in the
|
8
|
+
=combined= format and from Rails logs. Written in Ruby, it runs from
|
9
|
+
the command line, it is fast, and it can be installed on any system
|
10
|
+
which supports Ruby.
|
11
|
+
|
12
|
+
LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]]
|
13
|
+
and [[https://umami.is/][Umami]], focusing on privacy and data-ownership: the data
|
14
|
+
generated by LogSense is stored on your computer and owned by
|
15
|
+
you (like it should be).
|
16
|
+
|
17
|
+
LogSense is also inspired by static websites generators:
|
18
|
+
statistics are generated from the command line and accessed as static
|
19
|
+
HTML files. By generating static resources, LogSense
|
20
|
+
significantly reduces the attack surface of your webserver and
|
21
|
+
installation headaches.
|
22
|
+
|
23
|
+
We have, for instance, a cron job running on our servers, generating
|
24
|
+
statistics at night. The generated files are then made available on a
|
25
|
+
private area on the web.
|
26
|
+
|
27
|
+
Statistics are generated from Apache log formats in the =combined=
|
28
|
+
format and from Rails logs. Reports are tailored, but not limited, to
|
29
|
+
web servers serving static websites. No need to install Java Script
|
30
|
+
code on your websites, no cookies installed, no user tracking.
|
31
|
+
|
32
|
+
LogSense reports the following data:
|
33
|
+
|
34
|
+
- Visitors, hits, unique visitors, bandwidth used
|
35
|
+
- Most accessed HTML pages
|
36
|
+
- Most accessed resources
|
37
|
+
- Response statuses
|
38
|
+
- Referers
|
39
|
+
- OS, browsers, and devices
|
40
|
+
- IP Country location, thanks to the DPIP lite country DB
|
41
|
+
- Streaks: resources accessed by a given IP over time
|
42
|
+
- Potential attacks: access to resources which are not meant to be
|
43
|
+
served by a web server serving static websites
|
44
|
+
- Performance of Rails requests
|
45
|
+
|
46
|
+
Filters from the command line allow to analyze specific periods and
|
47
|
+
distinguish traffic generated by self polls and crawlers.
|
48
|
+
|
49
|
+
LogSense generates HTML, txt (Org Mode), and SQLite outputs.
|
50
|
+
|
51
|
+
* Installation
|
52
|
+
|
53
|
+
#+begin_src bash
|
54
|
+
gem install log_sense
|
55
|
+
#+end_src
|
56
|
+
|
57
|
+
* Usage
|
58
|
+
|
59
|
+
#+begin_src bash :results raw output :wrap example
|
60
|
+
log_sense --help
|
61
|
+
#+end_src
|
62
|
+
|
63
|
+
#+RESULTS:
|
64
|
+
#+begin_example
|
65
|
+
Usage: apache_log_report [options] [logfile]
|
66
|
+
-l, --limit=N Number of entries to show (defaults to 30)
|
67
|
+
-b, --begin=DATE Consider entries after or on DATE
|
68
|
+
-e, --end=DATE Consider entries before or on DATE
|
69
|
+
-i, --ignore-crawlers Ignore crawlers
|
70
|
+
-p, --ignore-selfpoll Ignore apaches self poll entries (from ::1)
|
71
|
+
--only-crawlers Perform analysis on crawlers only
|
72
|
+
-u, --prefix=PREFIX Prefix to add to all plots (used to run multiple analyses in the same dir)
|
73
|
+
-w, --suffix=SUFFIX Suffix to add to all plots (used to run multiple analyses in the same dir)
|
74
|
+
-c, --code-export=WHAT Control :export directive in Org Mode code blocks (code, results, *both*, none)
|
75
|
+
-f, --format=FORMAT Output format: html, org, sqlite. Defaults to org mode
|
76
|
+
-v, --version Prints version information
|
77
|
+
-h, --help Prints this help
|
78
|
+
This is version 1.1.6
|
79
|
+
#+end_example
|
80
|
+
|
81
|
+
* Change Log
|
82
|
+
|
83
|
+
See the [[file:CHANGELOG.org][CHANGELOG]] file.
|
84
|
+
|
85
|
+
* Compatibility
|
86
|
+
|
87
|
+
LogSense should run on any system on which Ruby runs.
|
88
|
+
|
89
|
+
Concerning the outputs:
|
90
|
+
|
91
|
+
- The HTML report uses [[https://picturepan2.github.io/spectre/][Spectre.css]] and (will use) [[https://vega.github.io/vega-lite/][Vega Light]], which
|
92
|
+
are downloaded from a CDN
|
93
|
+
- The textual format is compatible with Org Mode and can be further
|
94
|
+
processed to any format Org Mode can be exported to (including HTML
|
95
|
+
and PDF),
|
96
|
+
|
97
|
+
* Author and Contributors
|
98
|
+
|
99
|
+
[[http://ict4g.net/adolfo][Adolfo Villafiorita]].
|
100
|
+
|
101
|
+
* Known Bugs
|
102
|
+
|
103
|
+
Some known bugs and an unknown number of unknown bugs.
|
104
|
+
|
105
|
+
(See the open issues for the known bugs.)
|
106
|
+
|
107
|
+
* License
|
108
|
+
|
109
|
+
Distributed under the terms of the [[http://opensource.org/licenses/MIT][MIT License]].
|
110
|
+
|
111
|
+
Geolocation is made possible by the DB-IP.com IP to City database, released under
|
112
|
+
a CC license.
|
113
|
+
|
114
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
task :default => :spec
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
end
|
8
|
+
|
9
|
+
require_relative './lib/log_sense/ip_locator.rb'
|
10
|
+
|
11
|
+
desc "Convert Geolocation DB to sqlite"
|
12
|
+
task :dbip_to_sqlite3, [:filename] do |tasks, args|
|
13
|
+
filename = args[:filename]
|
14
|
+
ApacheLogReport::IpLocator::dbip_to_sqlite filename
|
15
|
+
end
|
data/alr-styles.css
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
nav {
|
2
|
+
position: fixed;
|
3
|
+
}
|
4
|
+
|
5
|
+
section {
|
6
|
+
margin-left: 250px;
|
7
|
+
}
|
8
|
+
|
9
|
+
article {
|
10
|
+
margin-top: 1rem;
|
11
|
+
}
|
12
|
+
|
13
|
+
h1, h2 {
|
14
|
+
color: #222222 !important;
|
15
|
+
}
|
16
|
+
|
17
|
+
/*
|
18
|
+
table {
|
19
|
+
border: 1px solid #222222;
|
20
|
+
border-collapse: collapse;
|
21
|
+
}
|
22
|
+
*/
|
23
|
+
|
24
|
+
table th {
|
25
|
+
background: #444444;
|
26
|
+
color: white;
|
27
|
+
}
|
28
|
+
|
29
|
+
.ip {
|
30
|
+
vertical-align: top;
|
31
|
+
}
|
32
|
+
|
33
|
+
.summary th {
|
34
|
+
text-align: left;
|
35
|
+
}
|
36
|
+
|
37
|
+
.summary td {
|
38
|
+
text-align: right;
|
39
|
+
}
|
40
|
+
|
41
|
+
.hits, .visits, .size, .visitors, .count {
|
42
|
+
text-align: right;
|
43
|
+
font-weight: bold;
|
44
|
+
}
|
45
|
+
|
46
|
+
.referers .size {
|
47
|
+
width: 20%;
|
48
|
+
font-weight: bold;
|
49
|
+
}
|
50
|
+
|
51
|
+
.command-invocation, .log-structure, .performance {
|
52
|
+
width: 60%;
|
53
|
+
}
|
54
|
+
|
55
|
+
.command-invocation th, .log-structure th, .performance th {
|
56
|
+
text-align: left;
|
57
|
+
}
|
58
|
+
|
59
|
+
.log-structure td, .performance td {
|
60
|
+
text-align: right;
|
61
|
+
}
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "log_sense"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/log_sense
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'log_sense.rb'
|
4
|
+
|
5
|
+
#
|
6
|
+
# Parse Command Line Arguments
|
7
|
+
#
|
8
|
+
|
9
|
+
# better be here... OptionsParser consumes ARGV
|
10
|
+
@command_line = ARGV.join(" ")
|
11
|
+
@options = LogSense::OptionsParser.parse ARGV
|
12
|
+
@input_file = @options[:input_file]
|
13
|
+
@output_file = @options[:output_file]
|
14
|
+
|
15
|
+
if not @input_file
|
16
|
+
puts "Error: no input file specified."
|
17
|
+
exit
|
18
|
+
end
|
19
|
+
|
20
|
+
if not File.exist? @input_file
|
21
|
+
puts "Error: input file '#{@input_file}' does not exist"
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Parse Log and Track Statistics
|
27
|
+
#
|
28
|
+
|
29
|
+
@started_at = Time.now
|
30
|
+
|
31
|
+
case @options[:input_format]
|
32
|
+
when 'apache'
|
33
|
+
parser_klass = LogSense::ApacheLogParser
|
34
|
+
cruncher_klass = LogSense::ApacheDataCruncher
|
35
|
+
when 'rails'
|
36
|
+
parser_klass = LogSense::RailsLogParser
|
37
|
+
cruncher_klass = LogSense::RailsDataCruncher
|
38
|
+
end
|
39
|
+
|
40
|
+
@db = parser_klass.parse @input_file
|
41
|
+
|
42
|
+
if @options[:output_format] == "sqlite"
|
43
|
+
ddb = SQLite3::Database.new(@output_file || "db.sqlite3")
|
44
|
+
b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
|
45
|
+
b.step(-1) #=> DONE
|
46
|
+
b.finish
|
47
|
+
else
|
48
|
+
@data = cruncher_klass.crunch @db, @options
|
49
|
+
@data = LogSense::IpLocator.geolocate @data
|
50
|
+
|
51
|
+
@ended_at = Time.now
|
52
|
+
@duration = @ended_at - @started_at
|
53
|
+
|
54
|
+
@data = @data.merge({
|
55
|
+
command: @command_line,
|
56
|
+
log_file: @input_file,
|
57
|
+
started_at: @started_at,
|
58
|
+
ended_at: @ended_at,
|
59
|
+
duration: @duration
|
60
|
+
})
|
61
|
+
|
62
|
+
#
|
63
|
+
# Emit Output
|
64
|
+
#
|
65
|
+
puts LogSense::Emitter.emit @data, @options
|
66
|
+
end
|
Binary file
|
@@ -0,0 +1,131 @@
|
|
1
|
+
module LogSense
|
2
|
+
module ApacheDataCruncher
|
3
|
+
#
|
4
|
+
# take a sqlite3 database and analyze data
|
5
|
+
#
|
6
|
+
# @ variables are automatically put in the returned data
|
7
|
+
#
|
8
|
+
|
9
|
+
def self.crunch db, options = { limit: 30 }
|
10
|
+
first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
|
11
|
+
last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
|
12
|
+
|
13
|
+
# make first and last day into dates or nil
|
14
|
+
@first_day = first_day_s.empty? ? nil : Date.parse(first_day_s[0][0])
|
15
|
+
@last_day = last_day_s.empty? ? nil : Date.parse(last_day_s[0][0])
|
16
|
+
|
17
|
+
@total_days = 0
|
18
|
+
if @first_day and @last_day
|
19
|
+
@total_days = (@last_day - @first_day).to_i
|
20
|
+
end
|
21
|
+
|
22
|
+
@log_size = db.execute "SELECT count(datetime) from LogLine"
|
23
|
+
@crawlers_size = db.execute "SELECT count(datetime) from LogLine where bot == 1"
|
24
|
+
@selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
|
25
|
+
|
26
|
+
@first_day_requested = options[:from_date]
|
27
|
+
@last_day_requested = options[:to_date]
|
28
|
+
|
29
|
+
@first_day_in_analysis = date_intersect options[:from_date], @first_day, :max
|
30
|
+
@last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
|
31
|
+
|
32
|
+
@total_days_in_analysis = 0
|
33
|
+
if @first_day_in_analysis and @last_day_in_analysis
|
34
|
+
@total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# generate the where clause corresponding to the command line options to filter data
|
39
|
+
#
|
40
|
+
filter = [
|
41
|
+
(options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
|
42
|
+
(options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
|
43
|
+
(options[:only_crawlers] ? "bot == 1" : nil),
|
44
|
+
(options[:ignore_crawlers] ? "bot == 0" : nil),
|
45
|
+
(options[:no_selfpolls] ? "ip != '::1'" : nil),
|
46
|
+
"true"
|
47
|
+
].compact.join " and "
|
48
|
+
|
49
|
+
mega = 1024 * 1024
|
50
|
+
giga = mega * 1024
|
51
|
+
tera = giga * 1024
|
52
|
+
|
53
|
+
# in alternative to sum(size)
|
54
|
+
human_readable_size = <<-EOS
|
55
|
+
CASE
|
56
|
+
WHEN sum(size) < 1024 THEN sum(size) || ' B'
|
57
|
+
WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB'
|
58
|
+
WHEN sum(size) >= (#{mega}) AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB'
|
59
|
+
WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB'
|
60
|
+
WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
|
61
|
+
END AS size
|
62
|
+
EOS
|
63
|
+
|
64
|
+
human_readable_day = <<-EOS
|
65
|
+
case cast (strftime('%w', datetime) as integer)
|
66
|
+
when 0 then 'Sunday'
|
67
|
+
when 1 then 'Monday'
|
68
|
+
when 2 then 'Tuesday'
|
69
|
+
when 3 then 'Wednesday'
|
70
|
+
when 4 then 'Thursday'
|
71
|
+
when 5 then 'Friday'
|
72
|
+
else 'Saturday'
|
73
|
+
end as dow
|
74
|
+
EOS
|
75
|
+
|
76
|
+
@total_hits = db.execute "SELECT count(datetime) from LogLine where #{filter}"
|
77
|
+
@total_unique_visitors = db.execute "SELECT count(distinct(unique_visitor)) from LogLine where #{filter}"
|
78
|
+
@total_size = db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
|
79
|
+
|
80
|
+
@daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
|
81
|
+
@time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
|
82
|
+
@most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
83
|
+
@most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
84
|
+
@missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
85
|
+
@missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
|
86
|
+
|
87
|
+
@reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
|
88
|
+
"extension != '#{x}'"
|
89
|
+
}.join " and "
|
90
|
+
|
91
|
+
@attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
|
92
|
+
@statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
|
93
|
+
|
94
|
+
@by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
|
95
|
+
@by_day_3xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '3' and #{filter} group by date(datetime)"
|
96
|
+
@by_day_2xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '2' and #{filter} group by date(datetime)"
|
97
|
+
|
98
|
+
@statuses_by_day = (@by_day_2xx + @by_day_3xx + @by_day_4xx).group_by { |x| x[0] }.to_a.map { |x|
|
99
|
+
[x[0], x[1].map { |y| y[1] }].flatten
|
100
|
+
}
|
101
|
+
|
102
|
+
@browsers = db.execute "SELECT browser, count(browser), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by browser order by count(browser) desc"
|
103
|
+
@platforms = db.execute "SELECT platform, count(platform), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by platform order by count(platform) desc"
|
104
|
+
@referers = db.execute "SELECT referer, count(referer), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by referer order by count(referer) desc limit #{options[:limit]}"
|
105
|
+
|
106
|
+
@ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
|
107
|
+
|
108
|
+
@streaks = db.execute "SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime"
|
109
|
+
data = {}
|
110
|
+
|
111
|
+
self.instance_variables.each do |variable|
|
112
|
+
var_as_symbol = variable.to_s[1..-1].to_sym
|
113
|
+
data[var_as_symbol] = eval(variable.to_s)
|
114
|
+
end
|
115
|
+
data
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def self.date_intersect date1, date2, method
|
121
|
+
if date1 and date2
|
122
|
+
[date1, date2].send(method)
|
123
|
+
elsif date1
|
124
|
+
date1
|
125
|
+
else
|
126
|
+
date2
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'apache_log/parser'
|
2
|
+
require 'sqlite3'
|
3
|
+
require 'browser'
|
4
|
+
|
5
|
+
module LogSense
|
6
|
+
module ApacheLogParser
|
7
|
+
#
|
8
|
+
# parse an Apache log file and return a SQLite3 DB
|
9
|
+
#
|
10
|
+
|
11
|
+
def self.parse filename, options = {}
|
12
|
+
content = filename ? File.readlines(filename) : ARGF.readlines
|
13
|
+
|
14
|
+
db = SQLite3::Database.new ":memory:"
|
15
|
+
db.execute "CREATE TABLE IF NOT EXISTS LogLine(
|
16
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
17
|
+
datetime TEXT,
|
18
|
+
ip TEXT,
|
19
|
+
user TEXT,
|
20
|
+
unique_visitor TEXT,
|
21
|
+
method TEXT,
|
22
|
+
path TEXT,
|
23
|
+
extension TEXT,
|
24
|
+
status TEXT,
|
25
|
+
size INTEGER,
|
26
|
+
referer TEXT,
|
27
|
+
user_agent TEXT,
|
28
|
+
bot INTEGER,
|
29
|
+
browser TEXT,
|
30
|
+
browser_version TEXT,
|
31
|
+
platform TEXT,
|
32
|
+
platform_version TEXT)"
|
33
|
+
|
34
|
+
ins = db.prepare('insert into LogLine (
|
35
|
+
datetime,
|
36
|
+
ip,
|
37
|
+
user,
|
38
|
+
unique_visitor,
|
39
|
+
method,
|
40
|
+
path,
|
41
|
+
extension,
|
42
|
+
status,
|
43
|
+
size,
|
44
|
+
referer,
|
45
|
+
user_agent,
|
46
|
+
bot,
|
47
|
+
browser,
|
48
|
+
browser_version,
|
49
|
+
platform,
|
50
|
+
platform_version)
|
51
|
+
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)')
|
52
|
+
|
53
|
+
parser = ApacheLog::Parser.new(options[:format] || 'combined')
|
54
|
+
|
55
|
+
content.each do |line|
|
56
|
+
begin
|
57
|
+
hash = parser.parse line
|
58
|
+
|
59
|
+
ua = Browser.new(hash[:user_agent], accept_language: "en-us")
|
60
|
+
ins.execute(
|
61
|
+
hash[:datetime].iso8601,
|
62
|
+
hash[:remote_host],
|
63
|
+
hash[:user],
|
64
|
+
hash[:datetime].strftime("%Y-%m-%d") + " " + hash[:remote_host] + " " + hash[:user_agent],
|
65
|
+
hash[:request][:method],
|
66
|
+
hash[:request][:path],
|
67
|
+
(hash[:request][:path] ? File.extname(hash[:request][:path]) : ""),
|
68
|
+
hash[:status],
|
69
|
+
hash[:size].to_i,
|
70
|
+
hash[:referer],
|
71
|
+
hash[:user_agent],
|
72
|
+
ua.bot? ? 1 : 0,
|
73
|
+
(ua.name || ""),
|
74
|
+
(ua.version || ""),
|
75
|
+
(ua.platform.name || ""),
|
76
|
+
(ua.platform.version || "")
|
77
|
+
)
|
78
|
+
rescue
|
79
|
+
STDERR.puts "Apache Log parser error: could not parse #{line}"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
db
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'terminal-table'
|
2
|
+
require 'erb'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
module LogSense
|
6
|
+
module Emitter
|
7
|
+
|
8
|
+
#
|
9
|
+
# Emit Data
|
10
|
+
#
|
11
|
+
def self.emit data = {}, options = {}
|
12
|
+
@input_format = options[:input_format] || "apache"
|
13
|
+
@output_format = options[:output_format] || "html"
|
14
|
+
|
15
|
+
# for the ERB binding
|
16
|
+
@data = data
|
17
|
+
@options = options
|
18
|
+
|
19
|
+
# determine the main template to read
|
20
|
+
@template = File.join(File.dirname(__FILE__), "templates", "#{@input_format}.#{@output_format}.erb")
|
21
|
+
erb_template = File.read @template
|
22
|
+
|
23
|
+
output = ERB.new(erb_template).result(binding)
|
24
|
+
|
25
|
+
if options[:output_file]
|
26
|
+
file = File.open options[:output_file], "w"
|
27
|
+
file.write output
|
28
|
+
file.close
|
29
|
+
else
|
30
|
+
puts output
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def self.output_txt_table name, headings, rows
|
37
|
+
name = "#+NAME: #{name}"
|
38
|
+
table = Terminal::Table.new headings: headings, rows: rows, style: { border_x: "-", border_i: "|" }
|
39
|
+
name + "\n" + table.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.render(template, vars)
|
43
|
+
@template = File.join(File.dirname(__FILE__), "templates", "_#{template}.html.erb")
|
44
|
+
erb_template = File.read @template
|
45
|
+
ERB.new(erb_template).result(OpenStruct.new(vars).instance_eval { binding })
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|