log_sense 1.5.2 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.org +27 -0
  3. data/Gemfile.lock +6 -4
  4. data/README.org +108 -34
  5. data/Rakefile +6 -6
  6. data/exe/log_sense +110 -39
  7. data/ip_locations/dbip-country-lite.sqlite3 +0 -0
  8. data/lib/log_sense/aggregator.rb +191 -0
  9. data/lib/log_sense/apache_aggregator.rb +122 -0
  10. data/lib/log_sense/apache_log_line_parser.rb +23 -21
  11. data/lib/log_sense/apache_log_parser.rb +15 -12
  12. data/lib/log_sense/apache_report_shaper.rb +309 -0
  13. data/lib/log_sense/emitter.rb +55 -553
  14. data/lib/log_sense/ip_locator.rb +24 -12
  15. data/lib/log_sense/options_checker.rb +24 -0
  16. data/lib/log_sense/options_parser.rb +81 -51
  17. data/lib/log_sense/rails_aggregator.rb +69 -0
  18. data/lib/log_sense/rails_log_parser.rb +82 -68
  19. data/lib/log_sense/rails_report_shaper.rb +183 -0
  20. data/lib/log_sense/report_shaper.rb +105 -0
  21. data/lib/log_sense/templates/_cdn_links.html.erb +11 -0
  22. data/lib/log_sense/templates/_command_invocation.html.erb +4 -0
  23. data/lib/log_sense/templates/_log_structure.html.erb +7 -1
  24. data/lib/log_sense/templates/_output_table.html.erb +6 -2
  25. data/lib/log_sense/templates/_rails.css.erb +7 -0
  26. data/lib/log_sense/templates/_summary.html.erb +9 -7
  27. data/lib/log_sense/templates/_summary.txt.erb +2 -2
  28. data/lib/log_sense/templates/{rails.html.erb → report_html.erb} +19 -37
  29. data/lib/log_sense/templates/{apache.txt.erb → report_txt.erb} +1 -1
  30. data/lib/log_sense/version.rb +1 -1
  31. data/lib/log_sense.rb +19 -9
  32. data/log_sense.gemspec +1 -1
  33. data/{apache-screenshot.png → screenshots/apache-screenshot.png} +0 -0
  34. data/screenshots/rails-screenshot.png +0 -0
  35. metadata +17 -11
  36. data/lib/log_sense/apache_data_cruncher.rb +0 -147
  37. data/lib/log_sense/rails_data_cruncher.rb +0 -141
  38. data/lib/log_sense/templates/apache.html.erb +0 -115
  39. data/lib/log_sense/templates/rails.txt.erb +0 -22
@@ -0,0 +1,122 @@
1
+ module LogSense
2
+ class ApacheAggregator < Aggregator
3
+ def initialize(db, options = { limit: 900 })
4
+ @table = "LogLine"
5
+ @date_field = "datetime"
6
+ @url_field = "path"
7
+
8
+ @db = db
9
+ @options = options
10
+ end
11
+
12
+ #
13
+ # take a sqlite3 database and analyze data
14
+ #
15
+ # @ variables are automatically put in the returned data
16
+ #
17
+ def aggregate
18
+ aggregate_log_info
19
+ aggregate_statuses
20
+ aggregate_ips
21
+
22
+ #
23
+ # Addition info specific to Apache Log Files
24
+ #
25
+ sp = @db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
26
+ @selfpolls_size = sp[0][0]
27
+
28
+ cw = @db.execute "SELECT count(datetime) from LogLine where bot == 1"
29
+ @crawlers_size = cw[0][0]
30
+
31
+ ts = @db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
32
+ @total_size = ts[0][0]
33
+
34
+ @daily_distribution = @db.execute "SELECT date(datetime),
35
+ #{human_readable_day},
36
+ count(datetime),
37
+ count(distinct(unique_visitor)),
38
+ #{human_readable_size} from LogLine
39
+ where #{filter}
40
+ group by date(datetime)"
41
+
42
+ @time_distribution = @db.execute "SELECT strftime('%H', datetime),
43
+ count(datetime),
44
+ count(distinct(unique_visitor)),
45
+ #{human_readable_size} from LogLine
46
+ where #{filter}
47
+ group by strftime('%H', datetime)"
48
+
49
+ html = "(extension like '.htm%')"
50
+ non_html = "(extension not like '.htm%')"
51
+ gs = "(status like '2%' or status like '3%')"
52
+ bs = "(status like '4%' or status like '5%')"
53
+
54
+ @most_requested_pages = @db.execute resource_query(html, gs)
55
+ @most_requested_resources = @db.execute resource_query(non_html, gs)
56
+ @missed_pages = @db.execute resource_query(html, bs)
57
+ @missed_resources = @db.execute resource_query(non_html, bs)
58
+
59
+ @missed_pages_by_ip = @db.execute "SELECT ip, path, status from LogLine
60
+ where #{bs} and
61
+ #{html} and
62
+ #{filter}
63
+ limit #{@options[:limit]}"
64
+
65
+ @missed_resources_by_ip = @db.execute "SELECT ip, path, status
66
+ from LogLine
67
+ where #{bs} and #{filter}
68
+ limit #{@options[:limit]}"
69
+
70
+ @browsers = @db.execute "SELECT browser,
71
+ count(browser),
72
+ count(distinct(unique_visitor)),
73
+ #{human_readable_size} from LogLine
74
+ where #{filter}
75
+ group by browser
76
+ order by count(browser) desc"
77
+
78
+ @platforms = @db.execute "SELECT platform,
79
+ count(platform),
80
+ count(distinct(unique_visitor)),
81
+ #{human_readable_size} from LogLine
82
+ where #{filter}
83
+ group by platform
84
+ order by count(platform) desc"
85
+
86
+ @combined_platforms = @db.execute "SELECT browser,
87
+ platform,
88
+ ip,
89
+ count(datetime),
90
+ #{human_readable_size}
91
+ from LogLine
92
+ where #{filter}
93
+ group by browser, platform, ip
94
+ order by count(datetime) desc
95
+ limit #{@options[:limit]}"
96
+
97
+ @referers = @db.execute "SELECT referer,
98
+ count(referer),
99
+ count(distinct(unique_visitor)),
100
+ #{human_readable_size} from LogLine
101
+ where #{filter}
102
+ group by referer
103
+ order by count(referer)
104
+ desc limit #{@options[:limit]}"
105
+
106
+ instance_vars_to_hash
107
+ end
108
+
109
+ private
110
+
111
+ def resource_query(type, result)
112
+ "SELECT path,
113
+ count(path),
114
+ count(distinct(unique_visitor)),
115
+ #{human_readable_size}, status from LogLine
116
+ where #{result} and #{type} and #{filter}
117
+ group by path
118
+ order by count(path) desc
119
+ limit #{@options[:limit]}"
120
+ end
121
+ end
122
+ end
@@ -1,24 +1,25 @@
1
1
  module LogSense
2
+ # parses a log line and returns a hash
3
+ # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
4
+ #
5
+ # %h: IP
6
+ # %l: ident or -
7
+ # %u: userid or -
8
+ # %t: [10/Oct/2000:13:55:36 -0700]
9
+ # day = 2*digit
10
+ # month = 3*letter
11
+ # year = 4*digit
12
+ # hour = 2*digit
13
+ # minute = 2*digit
14
+ # second = 2*digit
15
+ # zone = (`+' | `-') 4*digit
16
+ # %r: GET /apache_pb.gif HTTP/1.0
17
+ # %{User-agent}: "
18
+ #
19
+ # Example
20
+ # 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
21
+ #
2
22
  class ApacheLogLineParser
3
- # parses a query and makes it into an expression which can be evaluated
4
- # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
5
- #
6
- # %h: IP
7
- # %l: ident or -
8
- # %u: userid or -
9
- # %t: [10/Oct/2000:13:55:36 -0700]
10
- # day = 2*digit
11
- # month = 3*letter
12
- # year = 4*digit
13
- # hour = 2*digit
14
- # minute = 2*digit
15
- # second = 2*digit
16
- # zone = (`+' | `-') 4*digit
17
- # %r: GET /apache_pb.gif HTTP/1.0
18
- # %{User-agent}: "
19
- #
20
- # 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
21
-
22
23
  DAY = /[0-9]{2}/
23
24
  MONTH = /[A-Za-z]{3}/
24
25
  YEAR = /[0-9]{4}/
@@ -48,8 +49,9 @@ module LogSense
48
49
  @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
49
50
  end
50
51
 
51
- def parse line
52
- hash = @format.match(line) || raise("Apache LogLine Parser Error: Could not parse #{line}")
52
+ def parse(line)
53
+ @format.match(line) ||
54
+ raise("Apache LogLine Parser Error: Could not parse #{line}")
53
55
  end
54
56
  end
55
57
  end
@@ -1,14 +1,14 @@
1
- require 'sqlite3'
2
- require 'browser'
1
+ require "sqlite3"
2
+ require "browser"
3
+ require "log_sense/apache_log_line_parser"
3
4
 
4
5
  module LogSense
5
- module ApacheLogParser
6
- #
7
- # parse an Apache log file and return a SQLite3 DB
8
- #
9
-
10
- def self.parse(streams, options = {})
11
- db = SQLite3::Database.new ':memory:'
6
+ #
7
+ # parse an Apache log file and return a SQLite3 DB
8
+ #
9
+ class ApacheLogParser
10
+ def parse(streams, options = {})
11
+ db = SQLite3::Database.new ":memory:"
12
12
 
13
13
  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
14
14
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -31,7 +31,7 @@ module LogSense
31
31
  source_file TEXT,
32
32
  line_number INTEGER
33
33
  )"
34
-
34
+
35
35
  ins = db.prepare("insert into LogLine (
36
36
  datetime,
37
37
  ip,
@@ -82,7 +82,7 @@ module LogSense
82
82
  line_number
83
83
  )
84
84
  rescue StandardError => e
85
- $stderr.puts e.message
85
+ warn e.message
86
86
  end
87
87
  end
88
88
  end
@@ -90,8 +90,11 @@ module LogSense
90
90
  db
91
91
  end
92
92
 
93
- def self.unique_visitor_id hash
93
+ private
94
+
95
+ def unique_visitor_id hash
94
96
  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
95
97
  end
96
98
  end
97
99
  end
100
+
@@ -0,0 +1,309 @@
1
+ module LogSense
2
+ class ApacheReportShaper < ReportShaper
3
+ #
4
+ # Specification of the reports to generate
5
+ # Array of hashes with the following information:
6
+ # - title: report_title
7
+ # header: header of tabular data
8
+ # rows: data to show
9
+ # column_alignment: specification of column alignments (works for txt reports)
10
+ # vega_spec: specifications for Vega output
11
+ # datatable_options: specific options for datatable
12
+ def shape(data)
13
+ [
14
+ {
15
+ title: "Daily Distribution",
16
+ header: %w[Day DOW Hits Visits Size],
17
+ column_alignment: %i[left left right right right],
18
+ rows: data[:daily_distribution],
19
+ vega_spec: {
20
+ "layer": [
21
+ {
22
+ "mark": {
23
+ "type": "line",
24
+ "point": {
25
+ "filled": false,
26
+ "fill": "white"
27
+ }
28
+ },
29
+ "encoding": {
30
+ "y": {"field": "Hits", "type": "quantitative"}
31
+ }
32
+ },
33
+ {
34
+ "mark": {
35
+ "type": "text",
36
+ "color": "#3E5772",
37
+ "align": "middle",
38
+ "baseline": "top",
39
+ "dx": -10,
40
+ "yOffset": -15
41
+ },
42
+ "encoding": {
43
+ "text": {"field": "Hits", "type": "quantitative"},
44
+ "y": {"field": "Hits", "type": "quantitative"}
45
+ }
46
+ },
47
+
48
+ {
49
+ "mark": {
50
+ "type": "line",
51
+ "color": "#A52A2A",
52
+ "point": {
53
+ "color": "#A52A2A",
54
+ "filled": false,
55
+ "fill": "white",
56
+ }
57
+ },
58
+ "encoding": {
59
+ "y": {"field": "Visits", "type": "quantitative"}
60
+ }
61
+ },
62
+
63
+ {
64
+ "mark": {
65
+ "type": "text",
66
+ "color": "#A52A2A",
67
+ "align": "middle",
68
+ "baseline": "top",
69
+ "dx": -10,
70
+ "yOffset": -15
71
+ },
72
+ "encoding": {
73
+ "text": {"field": "Visits", "type": "quantitative"},
74
+ "y": {"field": "Visits", "type": "quantitative"}
75
+ }
76
+ },
77
+
78
+ ],
79
+ "encoding": {
80
+ "x": {"field": "Day", "type": "temporal"},
81
+ }
82
+ }
83
+ },
84
+ {
85
+ title: "Time Distribution",
86
+ header: %w[Hour Hits Visits Size],
87
+ column_alignment: %i[left right right right],
88
+ rows: data[:time_distribution],
89
+ vega_spec: {
90
+ "layer": [
91
+ {
92
+ "mark": "bar"
93
+ },
94
+ {
95
+ "mark": {
96
+ "type": "text",
97
+ "align": "middle",
98
+ "baseline": "top",
99
+ "dx": -10,
100
+ "yOffset": -15
101
+ },
102
+ "encoding": {
103
+ "text": {"field": "Hits", "type": "quantitative"},
104
+ "y": {"field": "Hits", "type": "quantitative"}
105
+ }
106
+ },
107
+ ],
108
+ "encoding": {
109
+ "x": {"field": "Hour", "type": "nominal"},
110
+ "y": {"field": "Hits", "type": "quantitative"}
111
+ }
112
+ }
113
+ },
114
+ {
115
+ title: "20_ and 30_ on HTML pages",
116
+ header: %w[Path Hits Visits Size Status],
117
+ column_alignment: %i[left right right right right],
118
+ rows: data[:most_requested_pages],
119
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"15%\", targets: [1, 2, 3, 4] }], dataRender: true"
120
+ },
121
+ {
122
+ title: "20_ and 30_ on other resources",
123
+ header: %w[Path Hits Visits Size Status],
124
+ column_alignment: %i[left right right right right],
125
+ rows: data[:most_requested_resources],
126
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"15%\", targets: [1, 2, 3, 4] }], dataRender: true"
127
+ },
128
+ {
129
+ title: "40_ and 50_x on HTML pages",
130
+ header: %w[Path Hits Visits Status],
131
+ column_alignment: %i[left right right right],
132
+ rows: data[:missed_pages],
133
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"20%\", targets: [1, 2, 3] }], dataRender: true"
134
+ },
135
+ {
136
+ title: "40_ and 50_ on other resources",
137
+ header: %w[Path Hits Visits Status],
138
+ column_alignment: %i[left right right right],
139
+ rows: data[:missed_resources],
140
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"20%\", targets: [1, 2, 3] }], dataRender: true"
141
+ },
142
+ {
143
+ title: "40_ and 50_x on HTML pages by IP",
144
+ header: %w[IP Hits Paths],
145
+ column_alignment: %i[left right left],
146
+ # Value is something along the line of:
147
+ # [["66.249.79.93", "/adolfo/notes/calendar/2014/11/16.html", "404"],
148
+ # ["66.249.79.93", "/adolfo/website-specification/generate-xml-sitemap.org.html", "404"]]
149
+ rows: data[:missed_pages_by_ip]&.group_by { |x| x[0] }&.map { |k, v|
150
+ [
151
+ k,
152
+ v.size,
153
+ v.map { |x| x[1] }.join(WORDS_SEPARATOR)
154
+ ]
155
+ }&.sort { |x, y| y[1] <=> x[1] }
156
+ },
157
+ {
158
+ title: "40_ and 50_ on other resources by IP",
159
+ header: %w[IP Hits Paths],
160
+ column_alignment: %i[left right left],
161
+ # Value is something along the line of:
162
+ # [["66.249.79.93", "/adolfo/notes/calendar/2014/11/16.html", "404"],
163
+ # ["66.249.79.93", "/adolfo/website-specification/generate-xml-sitemap.org.html", "404"]]
164
+ rows: data[:missed_resources_by_ip]&.group_by { |x| x[0] }&.map { |k, v|
165
+ [
166
+ k,
167
+ v.size,
168
+ v.map { |x| x[1] }.join(WORDS_SEPARATOR)
169
+ ]
170
+ }&.sort { |x, y| y[1] <=> x[1] }
171
+ },
172
+ {
173
+ title: "Statuses",
174
+ header: %w[Status Count],
175
+ column_alignment: %i[left right],
176
+ rows: data[:statuses],
177
+ vega_spec: {
178
+ "mark": "bar",
179
+ "encoding": {
180
+ "x": {"field": "Status", "type": "nominal"},
181
+ "y": {"field": "Count", "type": "quantitative"}
182
+ }
183
+ }
184
+ },
185
+ {
186
+ title: "Daily Statuses",
187
+ header: %w[Date S_2xx S_3xx S_4xx S_5xx],
188
+ column_alignment: %i[left right right right right],
189
+ rows: data[:statuses_by_day],
190
+ vega_spec: {
191
+ "transform": [ {"fold": ["S_2xx", "S_3xx", "S_4xx", "S_5xx" ] }],
192
+ "mark": "bar",
193
+ "encoding": {
194
+ "x": {
195
+ "field": "Date",
196
+ "type": "ordinal",
197
+ "timeUnit": "day",
198
+ },
199
+ "y": {
200
+ "aggregate": "sum",
201
+ "field": "value",
202
+ "type": "quantitative"
203
+ },
204
+ "color": {
205
+ "field": "key",
206
+ "type": "nominal",
207
+ "scale": {
208
+ "domain": ["S_2xx", "S_3xx", "S_4xx"],
209
+ "range": ["#228b22", "#ff8c00", "#a52a2a"]
210
+ },
211
+ }
212
+ }
213
+ }
214
+ },
215
+ {
216
+ title: "Browsers",
217
+ header: %w[Browser Hits Visits Size],
218
+ column_alignment: %i[left right right right],
219
+ rows: data[:browsers],
220
+ vega_spec: {
221
+ "layer": [
222
+ { "mark": "bar" },
223
+ {
224
+ "mark": {
225
+ "type": "text",
226
+ "align": "middle",
227
+ "baseline": "top",
228
+ "dx": -10,
229
+ "yOffset": -15
230
+ },
231
+ "encoding": {
232
+ "text": {"field": "Hits", "type": "quantitative"},
233
+ }
234
+ },
235
+ ],
236
+ "encoding": {
237
+ "x": {"field": "Browser", "type": "nominal"},
238
+ "y": {"field": "Hits", "type": "quantitative"}
239
+ }
240
+ }
241
+ },
242
+ {
243
+ title: "Platforms",
244
+ header: %w[Platform Hits Visits Size],
245
+ column_alignment: %i[left right right right],
246
+ rows: data[:platforms],
247
+ vega_spec: {
248
+ "layer": [
249
+ { "mark": "bar" },
250
+ {
251
+ "mark": {
252
+ "type": "text",
253
+ "align": "middle",
254
+ "baseline": "top",
255
+ "dx": -10,
256
+ "yOffset": -15
257
+ },
258
+ "encoding": {
259
+ "text": {"field": "Hits", "type": "quantitative"},
260
+ }
261
+ },
262
+ ],
263
+ "encoding": {
264
+ "x": {"field": "Platform", "type": "nominal"},
265
+ "y": {"field": "Hits", "type": "quantitative"}
266
+ }
267
+ }
268
+ },
269
+ {
270
+ title: "IPs",
271
+ header: %w[IP Hits Visits Size Country],
272
+ column_alignment: %i[left right right right left],
273
+ rows: data[:ips]
274
+ },
275
+ {
276
+ title: "Countries",
277
+ header: ["Country", "Hits", "Visits", "IPs", "IP List"],
278
+ column_alignment: %i[left right right right left],
279
+ rows: data[:countries]&.map { |k, v|
280
+ [
281
+ k,
282
+ v.map { |x| x[1] }.inject(&:+),
283
+ v.map { |x| x[2] }.inject(&:+),
284
+ v.map { |x| x[0] }.uniq.size,
285
+ v.map { |x| x[0] }.join(WORDS_SEPARATOR)
286
+ ]
287
+ }&.sort { |x, y| y[3] <=> x[3] }
288
+ },
289
+ ip_per_hour_report_spec(ips_per_hour(data[:ips_per_hour])),
290
+ {
291
+ title: "Combined Platform Data",
292
+ header: %w[Browser OS IP Hits Size],
293
+ column_alignment: %i[left left left right right],
294
+ col: "small-12 cell",
295
+ rows: data[:combined_platforms],
296
+ },
297
+ {
298
+ title: "Referers",
299
+ header: %w[Referers Hits Visits Size],
300
+ column_alignment: %i[left right right right],
301
+ datatable_options: "columnDefs: [{ width: \"50%\", targets: 0 } ], dataRender: true",
302
+ rows: data[:referers],
303
+ col: "small-12 cell"
304
+ },
305
+ session_report_spec(ips_detailed(data[:ips_per_day_detailed]))
306
+ ]
307
+ end
308
+ end
309
+ end