log_sense 1.5.2 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.org +27 -0
  3. data/Gemfile.lock +6 -4
  4. data/README.org +108 -34
  5. data/Rakefile +6 -6
  6. data/exe/log_sense +110 -39
  7. data/ip_locations/dbip-country-lite.sqlite3 +0 -0
  8. data/lib/log_sense/aggregator.rb +191 -0
  9. data/lib/log_sense/apache_aggregator.rb +122 -0
  10. data/lib/log_sense/apache_log_line_parser.rb +23 -21
  11. data/lib/log_sense/apache_log_parser.rb +15 -12
  12. data/lib/log_sense/apache_report_shaper.rb +309 -0
  13. data/lib/log_sense/emitter.rb +55 -553
  14. data/lib/log_sense/ip_locator.rb +24 -12
  15. data/lib/log_sense/options_checker.rb +24 -0
  16. data/lib/log_sense/options_parser.rb +81 -51
  17. data/lib/log_sense/rails_aggregator.rb +69 -0
  18. data/lib/log_sense/rails_log_parser.rb +82 -68
  19. data/lib/log_sense/rails_report_shaper.rb +183 -0
  20. data/lib/log_sense/report_shaper.rb +105 -0
  21. data/lib/log_sense/templates/_cdn_links.html.erb +11 -0
  22. data/lib/log_sense/templates/_command_invocation.html.erb +4 -0
  23. data/lib/log_sense/templates/_log_structure.html.erb +7 -1
  24. data/lib/log_sense/templates/_output_table.html.erb +6 -2
  25. data/lib/log_sense/templates/_rails.css.erb +7 -0
  26. data/lib/log_sense/templates/_summary.html.erb +9 -7
  27. data/lib/log_sense/templates/_summary.txt.erb +2 -2
  28. data/lib/log_sense/templates/{rails.html.erb → report_html.erb} +19 -37
  29. data/lib/log_sense/templates/{apache.txt.erb → report_txt.erb} +1 -1
  30. data/lib/log_sense/version.rb +1 -1
  31. data/lib/log_sense.rb +19 -9
  32. data/log_sense.gemspec +1 -1
  33. data/{apache-screenshot.png → screenshots/apache-screenshot.png} +0 -0
  34. data/screenshots/rails-screenshot.png +0 -0
  35. metadata +17 -11
  36. data/lib/log_sense/apache_data_cruncher.rb +0 -147
  37. data/lib/log_sense/rails_data_cruncher.rb +0 -141
  38. data/lib/log_sense/templates/apache.html.erb +0 -115
  39. data/lib/log_sense/templates/rails.txt.erb +0 -22
@@ -0,0 +1,122 @@
1
+ module LogSense
2
+ class ApacheAggregator < Aggregator
3
+ def initialize(db, options = { limit: 900 })
4
+ @table = "LogLine"
5
+ @date_field = "datetime"
6
+ @url_field = "path"
7
+
8
+ @db = db
9
+ @options = options
10
+ end
11
+
12
+ #
13
+ # take a sqlite3 database and analyze data
14
+ #
15
+ # @ variables are automatically put in the returned data
16
+ #
17
+ def aggregate
18
+ aggregate_log_info
19
+ aggregate_statuses
20
+ aggregate_ips
21
+
22
+ #
23
+ # Addition info specific to Apache Log Files
24
+ #
25
+ sp = @db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
26
+ @selfpolls_size = sp[0][0]
27
+
28
+ cw = @db.execute "SELECT count(datetime) from LogLine where bot == 1"
29
+ @crawlers_size = cw[0][0]
30
+
31
+ ts = @db.execute "SELECT #{human_readable_size} from LogLine where #{filter}"
32
+ @total_size = ts[0][0]
33
+
34
+ @daily_distribution = @db.execute "SELECT date(datetime),
35
+ #{human_readable_day},
36
+ count(datetime),
37
+ count(distinct(unique_visitor)),
38
+ #{human_readable_size} from LogLine
39
+ where #{filter}
40
+ group by date(datetime)"
41
+
42
+ @time_distribution = @db.execute "SELECT strftime('%H', datetime),
43
+ count(datetime),
44
+ count(distinct(unique_visitor)),
45
+ #{human_readable_size} from LogLine
46
+ where #{filter}
47
+ group by strftime('%H', datetime)"
48
+
49
+ html = "(extension like '.htm%')"
50
+ non_html = "(extension not like '.htm%')"
51
+ gs = "(status like '2%' or status like '3%')"
52
+ bs = "(status like '4%' or status like '5%')"
53
+
54
+ @most_requested_pages = @db.execute resource_query(html, gs)
55
+ @most_requested_resources = @db.execute resource_query(non_html, gs)
56
+ @missed_pages = @db.execute resource_query(html, bs)
57
+ @missed_resources = @db.execute resource_query(non_html, bs)
58
+
59
+ @missed_pages_by_ip = @db.execute "SELECT ip, path, status from LogLine
60
+ where #{bs} and
61
+ #{html} and
62
+ #{filter}
63
+ limit #{@options[:limit]}"
64
+
65
+ @missed_resources_by_ip = @db.execute "SELECT ip, path, status
66
+ from LogLine
67
+ where #{bs} and #{filter}
68
+ limit #{@options[:limit]}"
69
+
70
+ @browsers = @db.execute "SELECT browser,
71
+ count(browser),
72
+ count(distinct(unique_visitor)),
73
+ #{human_readable_size} from LogLine
74
+ where #{filter}
75
+ group by browser
76
+ order by count(browser) desc"
77
+
78
+ @platforms = @db.execute "SELECT platform,
79
+ count(platform),
80
+ count(distinct(unique_visitor)),
81
+ #{human_readable_size} from LogLine
82
+ where #{filter}
83
+ group by platform
84
+ order by count(platform) desc"
85
+
86
+ @combined_platforms = @db.execute "SELECT browser,
87
+ platform,
88
+ ip,
89
+ count(datetime),
90
+ #{human_readable_size}
91
+ from LogLine
92
+ where #{filter}
93
+ group by browser, platform, ip
94
+ order by count(datetime) desc
95
+ limit #{@options[:limit]}"
96
+
97
+ @referers = @db.execute "SELECT referer,
98
+ count(referer),
99
+ count(distinct(unique_visitor)),
100
+ #{human_readable_size} from LogLine
101
+ where #{filter}
102
+ group by referer
103
+ order by count(referer)
104
+ desc limit #{@options[:limit]}"
105
+
106
+ instance_vars_to_hash
107
+ end
108
+
109
+ private
110
+
111
+ def resource_query(type, result)
112
+ "SELECT path,
113
+ count(path),
114
+ count(distinct(unique_visitor)),
115
+ #{human_readable_size}, status from LogLine
116
+ where #{result} and #{type} and #{filter}
117
+ group by path
118
+ order by count(path) desc
119
+ limit #{@options[:limit]}"
120
+ end
121
+ end
122
+ end
@@ -1,24 +1,25 @@
1
1
  module LogSense
2
+ # parses a log line and returns a hash
3
+ # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
4
+ #
5
+ # %h: IP
6
+ # %l: ident or -
7
+ # %u: userid or -
8
+ # %t: [10/Oct/2000:13:55:36 -0700]
9
+ # day = 2*digit
10
+ # month = 3*letter
11
+ # year = 4*digit
12
+ # hour = 2*digit
13
+ # minute = 2*digit
14
+ # second = 2*digit
15
+ # zone = (`+' | `-') 4*digit
16
+ # %r: GET /apache_pb.gif HTTP/1.0
17
+ # %{User-agent}: "
18
+ #
19
+ # Example
20
+ # 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
21
+ #
2
22
  class ApacheLogLineParser
3
- # parses a query and makes it into an expression which can be evaluated
4
- # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
5
- #
6
- # %h: IP
7
- # %l: ident or -
8
- # %u: userid or -
9
- # %t: [10/Oct/2000:13:55:36 -0700]
10
- # day = 2*digit
11
- # month = 3*letter
12
- # year = 4*digit
13
- # hour = 2*digit
14
- # minute = 2*digit
15
- # second = 2*digit
16
- # zone = (`+' | `-') 4*digit
17
- # %r: GET /apache_pb.gif HTTP/1.0
18
- # %{User-agent}: "
19
- #
20
- # 116.179.32.16 - - [19/Dec/2021:22:35:11 +0100] "GET / HTTP/1.1" 200 135 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
21
-
22
23
  DAY = /[0-9]{2}/
23
24
  MONTH = /[A-Za-z]{3}/
24
25
  YEAR = /[0-9]{4}/
@@ -48,8 +49,9 @@ module LogSense
48
49
  @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
49
50
  end
50
51
 
51
- def parse line
52
- hash = @format.match(line) || raise("Apache LogLine Parser Error: Could not parse #{line}")
52
+ def parse(line)
53
+ @format.match(line) ||
54
+ raise("Apache LogLine Parser Error: Could not parse #{line}")
53
55
  end
54
56
  end
55
57
  end
@@ -1,14 +1,14 @@
1
- require 'sqlite3'
2
- require 'browser'
1
+ require "sqlite3"
2
+ require "browser"
3
+ require "log_sense/apache_log_line_parser"
3
4
 
4
5
  module LogSense
5
- module ApacheLogParser
6
- #
7
- # parse an Apache log file and return a SQLite3 DB
8
- #
9
-
10
- def self.parse(streams, options = {})
11
- db = SQLite3::Database.new ':memory:'
6
+ #
7
+ # parse an Apache log file and return a SQLite3 DB
8
+ #
9
+ class ApacheLogParser
10
+ def parse(streams, options = {})
11
+ db = SQLite3::Database.new ":memory:"
12
12
 
13
13
  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
14
14
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -31,7 +31,7 @@ module LogSense
31
31
  source_file TEXT,
32
32
  line_number INTEGER
33
33
  )"
34
-
34
+
35
35
  ins = db.prepare("insert into LogLine (
36
36
  datetime,
37
37
  ip,
@@ -82,7 +82,7 @@ module LogSense
82
82
  line_number
83
83
  )
84
84
  rescue StandardError => e
85
- $stderr.puts e.message
85
+ warn e.message
86
86
  end
87
87
  end
88
88
  end
@@ -90,8 +90,11 @@ module LogSense
90
90
  db
91
91
  end
92
92
 
93
- def self.unique_visitor_id hash
93
+ private
94
+
95
+ def unique_visitor_id hash
94
96
  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
95
97
  end
96
98
  end
97
99
  end
100
+
@@ -0,0 +1,309 @@
1
+ module LogSense
2
+ class ApacheReportShaper < ReportShaper
3
+ #
4
+ # Specification of the reports to generate
5
+ # Array of hashes with the following information:
6
+ # - title: report_title
7
+ # header: header of tabular data
8
+ # rows: data to show
9
+ # column_alignment: specification of column alignments (works for txt reports)
10
+ # vega_spec: specifications for Vega output
11
+ # datatable_options: specific options for datatable
12
+ def shape(data)
13
+ [
14
+ {
15
+ title: "Daily Distribution",
16
+ header: %w[Day DOW Hits Visits Size],
17
+ column_alignment: %i[left left right right right],
18
+ rows: data[:daily_distribution],
19
+ vega_spec: {
20
+ "layer": [
21
+ {
22
+ "mark": {
23
+ "type": "line",
24
+ "point": {
25
+ "filled": false,
26
+ "fill": "white"
27
+ }
28
+ },
29
+ "encoding": {
30
+ "y": {"field": "Hits", "type": "quantitative"}
31
+ }
32
+ },
33
+ {
34
+ "mark": {
35
+ "type": "text",
36
+ "color": "#3E5772",
37
+ "align": "middle",
38
+ "baseline": "top",
39
+ "dx": -10,
40
+ "yOffset": -15
41
+ },
42
+ "encoding": {
43
+ "text": {"field": "Hits", "type": "quantitative"},
44
+ "y": {"field": "Hits", "type": "quantitative"}
45
+ }
46
+ },
47
+
48
+ {
49
+ "mark": {
50
+ "type": "line",
51
+ "color": "#A52A2A",
52
+ "point": {
53
+ "color": "#A52A2A",
54
+ "filled": false,
55
+ "fill": "white",
56
+ }
57
+ },
58
+ "encoding": {
59
+ "y": {"field": "Visits", "type": "quantitative"}
60
+ }
61
+ },
62
+
63
+ {
64
+ "mark": {
65
+ "type": "text",
66
+ "color": "#A52A2A",
67
+ "align": "middle",
68
+ "baseline": "top",
69
+ "dx": -10,
70
+ "yOffset": -15
71
+ },
72
+ "encoding": {
73
+ "text": {"field": "Visits", "type": "quantitative"},
74
+ "y": {"field": "Visits", "type": "quantitative"}
75
+ }
76
+ },
77
+
78
+ ],
79
+ "encoding": {
80
+ "x": {"field": "Day", "type": "temporal"},
81
+ }
82
+ }
83
+ },
84
+ {
85
+ title: "Time Distribution",
86
+ header: %w[Hour Hits Visits Size],
87
+ column_alignment: %i[left right right right],
88
+ rows: data[:time_distribution],
89
+ vega_spec: {
90
+ "layer": [
91
+ {
92
+ "mark": "bar"
93
+ },
94
+ {
95
+ "mark": {
96
+ "type": "text",
97
+ "align": "middle",
98
+ "baseline": "top",
99
+ "dx": -10,
100
+ "yOffset": -15
101
+ },
102
+ "encoding": {
103
+ "text": {"field": "Hits", "type": "quantitative"},
104
+ "y": {"field": "Hits", "type": "quantitative"}
105
+ }
106
+ },
107
+ ],
108
+ "encoding": {
109
+ "x": {"field": "Hour", "type": "nominal"},
110
+ "y": {"field": "Hits", "type": "quantitative"}
111
+ }
112
+ }
113
+ },
114
+ {
115
+ title: "20_ and 30_ on HTML pages",
116
+ header: %w[Path Hits Visits Size Status],
117
+ column_alignment: %i[left right right right right],
118
+ rows: data[:most_requested_pages],
119
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"15%\", targets: [1, 2, 3, 4] }], dataRender: true"
120
+ },
121
+ {
122
+ title: "20_ and 30_ on other resources",
123
+ header: %w[Path Hits Visits Size Status],
124
+ column_alignment: %i[left right right right right],
125
+ rows: data[:most_requested_resources],
126
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"15%\", targets: [1, 2, 3, 4] }], dataRender: true"
127
+ },
128
+ {
129
+ title: "40_ and 50_x on HTML pages",
130
+ header: %w[Path Hits Visits Status],
131
+ column_alignment: %i[left right right right],
132
+ rows: data[:missed_pages],
133
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"20%\", targets: [1, 2, 3] }], dataRender: true"
134
+ },
135
+ {
136
+ title: "40_ and 50_ on other resources",
137
+ header: %w[Path Hits Visits Status],
138
+ column_alignment: %i[left right right right],
139
+ rows: data[:missed_resources],
140
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 }, { width: \"20%\", targets: [1, 2, 3] }], dataRender: true"
141
+ },
142
+ {
143
+ title: "40_ and 50_x on HTML pages by IP",
144
+ header: %w[IP Hits Paths],
145
+ column_alignment: %i[left right left],
146
+ # Value is something along the line of:
147
+ # [["66.249.79.93", "/adolfo/notes/calendar/2014/11/16.html", "404"],
148
+ # ["66.249.79.93", "/adolfo/website-specification/generate-xml-sitemap.org.html", "404"]]
149
+ rows: data[:missed_pages_by_ip]&.group_by { |x| x[0] }&.map { |k, v|
150
+ [
151
+ k,
152
+ v.size,
153
+ v.map { |x| x[1] }.join(WORDS_SEPARATOR)
154
+ ]
155
+ }&.sort { |x, y| y[1] <=> x[1] }
156
+ },
157
+ {
158
+ title: "40_ and 50_ on other resources by IP",
159
+ header: %w[IP Hits Paths],
160
+ column_alignment: %i[left right left],
161
+ # Value is something along the line of:
162
+ # [["66.249.79.93", "/adolfo/notes/calendar/2014/11/16.html", "404"],
163
+ # ["66.249.79.93", "/adolfo/website-specification/generate-xml-sitemap.org.html", "404"]]
164
+ rows: data[:missed_resources_by_ip]&.group_by { |x| x[0] }&.map { |k, v|
165
+ [
166
+ k,
167
+ v.size,
168
+ v.map { |x| x[1] }.join(WORDS_SEPARATOR)
169
+ ]
170
+ }&.sort { |x, y| y[1] <=> x[1] }
171
+ },
172
+ {
173
+ title: "Statuses",
174
+ header: %w[Status Count],
175
+ column_alignment: %i[left right],
176
+ rows: data[:statuses],
177
+ vega_spec: {
178
+ "mark": "bar",
179
+ "encoding": {
180
+ "x": {"field": "Status", "type": "nominal"},
181
+ "y": {"field": "Count", "type": "quantitative"}
182
+ }
183
+ }
184
+ },
185
+ {
186
+ title: "Daily Statuses",
187
+ header: %w[Date S_2xx S_3xx S_4xx S_5xx],
188
+ column_alignment: %i[left right right right right],
189
+ rows: data[:statuses_by_day],
190
+ vega_spec: {
191
+ "transform": [ {"fold": ["S_2xx", "S_3xx", "S_4xx", "S_5xx" ] }],
192
+ "mark": "bar",
193
+ "encoding": {
194
+ "x": {
195
+ "field": "Date",
196
+ "type": "ordinal",
197
+ "timeUnit": "day",
198
+ },
199
+ "y": {
200
+ "aggregate": "sum",
201
+ "field": "value",
202
+ "type": "quantitative"
203
+ },
204
+ "color": {
205
+ "field": "key",
206
+ "type": "nominal",
207
+ "scale": {
208
+ "domain": ["S_2xx", "S_3xx", "S_4xx"],
209
+ "range": ["#228b22", "#ff8c00", "#a52a2a"]
210
+ },
211
+ }
212
+ }
213
+ }
214
+ },
215
+ {
216
+ title: "Browsers",
217
+ header: %w[Browser Hits Visits Size],
218
+ column_alignment: %i[left right right right],
219
+ rows: data[:browsers],
220
+ vega_spec: {
221
+ "layer": [
222
+ { "mark": "bar" },
223
+ {
224
+ "mark": {
225
+ "type": "text",
226
+ "align": "middle",
227
+ "baseline": "top",
228
+ "dx": -10,
229
+ "yOffset": -15
230
+ },
231
+ "encoding": {
232
+ "text": {"field": "Hits", "type": "quantitative"},
233
+ }
234
+ },
235
+ ],
236
+ "encoding": {
237
+ "x": {"field": "Browser", "type": "nominal"},
238
+ "y": {"field": "Hits", "type": "quantitative"}
239
+ }
240
+ }
241
+ },
242
+ {
243
+ title: "Platforms",
244
+ header: %w[Platform Hits Visits Size],
245
+ column_alignment: %i[left right right right],
246
+ rows: data[:platforms],
247
+ vega_spec: {
248
+ "layer": [
249
+ { "mark": "bar" },
250
+ {
251
+ "mark": {
252
+ "type": "text",
253
+ "align": "middle",
254
+ "baseline": "top",
255
+ "dx": -10,
256
+ "yOffset": -15
257
+ },
258
+ "encoding": {
259
+ "text": {"field": "Hits", "type": "quantitative"},
260
+ }
261
+ },
262
+ ],
263
+ "encoding": {
264
+ "x": {"field": "Platform", "type": "nominal"},
265
+ "y": {"field": "Hits", "type": "quantitative"}
266
+ }
267
+ }
268
+ },
269
+ {
270
+ title: "IPs",
271
+ header: %w[IP Hits Visits Size Country],
272
+ column_alignment: %i[left right right right left],
273
+ rows: data[:ips]
274
+ },
275
+ {
276
+ title: "Countries",
277
+ header: ["Country", "Hits", "Visits", "IPs", "IP List"],
278
+ column_alignment: %i[left right right right left],
279
+ rows: data[:countries]&.map { |k, v|
280
+ [
281
+ k,
282
+ v.map { |x| x[1] }.inject(&:+),
283
+ v.map { |x| x[2] }.inject(&:+),
284
+ v.map { |x| x[0] }.uniq.size,
285
+ v.map { |x| x[0] }.join(WORDS_SEPARATOR)
286
+ ]
287
+ }&.sort { |x, y| y[3] <=> x[3] }
288
+ },
289
+ ip_per_hour_report_spec(ips_per_hour(data[:ips_per_hour])),
290
+ {
291
+ title: "Combined Platform Data",
292
+ header: %w[Browser OS IP Hits Size],
293
+ column_alignment: %i[left left left right right],
294
+ col: "small-12 cell",
295
+ rows: data[:combined_platforms],
296
+ },
297
+ {
298
+ title: "Referers",
299
+ header: %w[Referers Hits Visits Size],
300
+ column_alignment: %i[left right right right],
301
+ datatable_options: "columnDefs: [{ width: \"50%\", targets: 0 } ], dataRender: true",
302
+ rows: data[:referers],
303
+ col: "small-12 cell"
304
+ },
305
+ session_report_spec(ips_detailed(data[:ips_per_day_detailed]))
306
+ ]
307
+ end
308
+ end
309
+ end