log_sense 1.3.5 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30e8194103003ca9861272072bbf9ef199d7d80b67a0b73fb38d510f23adacee
4
- data.tar.gz: 00b36829dd41b27e79cd6a41cacb412479631c0be9a9e2b79d585f81e0c7efa8
3
+ metadata.gz: 266e20972553f6d409814398dab832334a4c01bfa12e90c74acfdc75ee0b7c8d
4
+ data.tar.gz: 228e6bdc2d931e5190d82fc5ba66660ff6f0de0277a876746154de81a1ffe4e2
5
5
  SHA512:
6
- metadata.gz: 41b392e7d5ec01052dbb645d50b6dc6736a82c04ea231ac33f3fa2c3679cc27a959164a8f5f9524f1604f1b495c032978a2e4e14ca01a706448fc6a8d7556185
7
- data.tar.gz: fea27d4e0765fec9b090101a3efec3eb5ebd162af1c7389162d34c876754980b5dbccb22141b3aa4a0ec3eeb44bfd3936e17cc5463dc93148c3718bb51a1dcec
6
+ metadata.gz: f1454d78cfec258ff3bc69359be29178ebab4cf7ffd2869d736c29f2cffd6efe209f65be59298864bf94de30bd022a3397c91446b043c49e704b6d38ced59357
7
+ data.tar.gz: aa7239af4bb17270a23d9931194859b01a8ff50ebb9cc3c3ed37aeac1702aef413051b0381c57208dcffedd31ccbff5bd0a9485c0fe18947f540cda9f4463acd
data/CHANGELOG.org CHANGED
@@ -2,6 +2,25 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.4.0
6
+
7
+ - [User] The Apache Log report now organizes page requests in four
8
+ tables:
9
+ - success on HTML pages
10
+ - success on other resources
11
+ - failures on HTML pages
12
+ - failures on other resources
13
+ - [User] Increased the default limit of pages in reports to 900
14
+ - [User] The return status in now included in the page and resources
15
+ reports
16
+ - [User] The "Attack" table has been removed, since the data can be
17
+ gotten from the previous tables
18
+ - [Fixed] HTML pages are those with extension ".html" and ".htm"
19
+ - [Fixed] Wrong data on summary table of the apache report has
20
+ been fixed
21
+ - [Fixed] Better JavaScript escaping to avoid log poisoning
22
+ - [Fixed] Strengthened the Apache log parser
23
+
5
24
  * 1.3.3 and 1.3.4
6
25
 
7
26
  - [Gem] Moved repository to Github and fixes to gemspec
@@ -6,7 +6,7 @@ module LogSense
6
6
  # @ variables are automatically put in the returned data
7
7
  #
8
8
 
9
- def self.crunch db, options = { limit: 30 }
9
+ def self.crunch db, options = { limit: 900 }
10
10
  first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
11
11
  last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
12
12
 
@@ -89,16 +89,18 @@ module LogSense
89
89
 
90
90
  @daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
91
91
  @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
92
- @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
93
- @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
94
- @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
95
- @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
96
92
 
97
- @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
98
- "extension != '#{x}'"
99
- }.join " and "
93
+ good_statuses = "(status like '2%' or status like '3%')"
94
+ bad_statuses = "(status like '4%' or status like '5%')"
95
+ html_page = "(extension like '.htm%')"
96
+ non_html_page = "(extension not like '.htm%')"
97
+
98
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
99
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
+
101
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
102
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
103
 
101
- @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
102
104
  @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
103
105
 
104
106
  @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
@@ -33,20 +33,19 @@ module LogSense
33
33
 
34
34
  HTTP_METHODS=/GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
35
  WEBDAV_METHODS=/COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
- OTHER_METHODS=/SEARCH|REPORT/
36
+ OTHER_METHODS=/SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
37
  METHOD=/(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
- PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9])/
38
+ PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
39
  URL=/(?<url>[^ ]+)/
40
- REFERER=/(?<referer>[^ ]+)/
40
+ REFERER=/(?<referer>[^"]*)/
41
41
  RETURN_CODE=/(?<status>[1-5][0-9][0-9])/
42
42
  SIZE=/(?<size>[0-9]+|-)/
43
-
44
- USER_AGENT = /(?<user_agent>[^"]+)/
43
+ USER_AGENT = /(?<user_agent>[^"]*)/
45
44
 
46
45
  attr_reader :format
47
46
 
48
47
  def initialize
49
- @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
48
+ @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
50
49
  end
51
50
 
52
51
  def parse line
@@ -41,7 +41,20 @@ module LogSense
41
41
  end
42
42
 
43
43
  def self.escape_javascript(string)
44
- js_escape_map = { "\\" => "\\\\", "</" => '<\/', "\r\n" => '\n', "\n" => '\n', "\r" => '\n', '"' => '\\"', "'" => "\\'", "`" => "\\`", "$" => "\\$" }
44
+ js_escape_map = {
45
+ "<script" => "&lt;script",
46
+ "</script" => "&lt;/script",
47
+ "<" => "&lt;",
48
+ "</" => '&lt;\/',
49
+ "\\" => "\\\\",
50
+ "\r\n" => '\\r\\n',
51
+ "\n" => '\\n',
52
+ "\r" => '\\r',
53
+ '"' => ' \\"',
54
+ "'" => " \\'",
55
+ "`" => " \\`",
56
+ "$" => " \\$"
57
+ }
45
58
  js_escape_map.each do |k, v|
46
59
  string = string.gsub(k, v)
47
60
  end
@@ -8,7 +8,7 @@ module LogSense
8
8
  # parse command line options
9
9
  #
10
10
  def self.parse options
11
- limit = 30
11
+ limit = 900
12
12
  args = {}
13
13
 
14
14
  opt_parser = OptionParser.new do |opts|
@@ -7,7 +7,7 @@ module LogSense
7
7
  # @ variables are automatically put in the returned data
8
8
  #
9
9
 
10
- def self.crunch db, options = { limit: 30 }
10
+ def self.crunch db, options = { limit: 900 }
11
11
  first_day_s = db.execute "SELECT started_at from Event where started_at not NULL order by started_at limit 1"
12
12
  # we could use ended_at to cover the full activity period, but I prefer started_at
13
13
  # with the meaning that the monitor event initiation
@@ -19,6 +19,7 @@ end
19
19
  $(document).ready(function(){
20
20
  $('#table-<%= index %>').dataTable({
21
21
  data: data_<%= index %>,
22
+ <%= report[:datatable_options] + "," if report[:datatable_options] %>
22
23
  columns: [
23
24
  <% report[:header].each do |header| %>
24
25
  { data: '<%= header %>', className: '<%= slugify(header) %>' },
@@ -4,7 +4,8 @@
4
4
  <% report[:rows].each do |row| %>
5
5
  {
6
6
  <% report[:header].each_with_index do |h, i| %>
7
- "<%= h %>": <%= (row[i].class == Integer or row[i].class == Float) ? row[i] : "\"#{Emitter::escape_javascript(row[i] || '')}\"" %>,
7
+ <% resized_row = (row[i] || '').size > 150 ? "#{row[i][0..150]...}" : (row[i] || "") %>
8
+ "<%= h %>": <%= (row[i].class == Integer or row[i].class == Float) ? row[i] : "\"#{Emitter::escape_javascript(resized_row)}\"" %>,
8
9
  <% end %>
9
10
  },
10
11
  <% end %>
@@ -17,7 +17,12 @@
17
17
  <%= data[:total_unique_visits] %> <span class="stats-list-label">Unique Visits</span>
18
18
  </li>
19
19
  <li class="stats-list-negative">
20
- <%= data[:total_unique_visits] != 0 ? data[:total_hits] / data[:total_unique_visits] : "N/A" %>
20
+ <% days = data[:last_day_in_analysis] - data[:first_day_in_analysis] %>
21
+ <%= days > 0 ? "%.2f" % (data[:total_unique_visits] / days.to_f) : "N/A" %>
21
22
  <span class="stats-list-label">Unique Visits / Day</span>
22
23
  </li>
24
+ <li class="stats-list-negative">
25
+ <%= data[:total_unique_visits] != 0 ? data[:total_hits] / data[:total_unique_visits] : "N/A" %>
26
+ <span class="stats-list-label">Page Visited / Unique Visitor</span>
27
+ </li>
23
28
  </ul>
@@ -157,11 +157,10 @@
157
157
  "Log Structure",
158
158
  "Daily Distribution",
159
159
  "Time Distribution",
160
- "Most Requested Pages",
161
- "Most Requested Resources",
162
- "404 on HTML Files",
163
- "404 on other Resources",
164
- "Attacks",
160
+ "20_ and 30_ on HTML pages",
161
+ "20_ and 30_ on other resources",
162
+ "40_ and 50_ on HTML pages",
163
+ "40_ and 50_ on other Resources",
165
164
  "Statuses",
166
165
  "Daily Statuses",
167
166
  "Browsers",
@@ -319,14 +318,26 @@
319
318
  }
320
319
  }
321
320
  },
322
- { title: "Most Requested Pages",
323
- header: ["Path", "Hits", "Visits", "Size"],
321
+ { title: "20_ and 30_ on HTML pages",
322
+ header: ["Path", "Hits", "Visits", "Size", "Status"],
324
323
  rows: data[:most_requested_pages],
324
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
325
+ },
326
+ { title: "20_ and 30_ on other resources",
327
+ header: ["Path", "Hits", "Visits", "Size", "Status"],
328
+ rows: data[:most_requested_resources],
329
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
330
+ },
331
+ { title: "40_ and 50_x on HTML pages",
332
+ header: ["Path", "Hits", "Visits", "Status"],
333
+ rows: data[:missed_pages],
334
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
335
+ },
336
+ { title: "40_ and 50_ on other resources",
337
+ header: ["Path", "Hits", "Visits", "Status"],
338
+ rows: data[:missed_resources],
339
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
325
340
  },
326
- { title: "Most Requested Resources", header: ["Path", "Hits", "Visits", "Size"], rows: data[:most_requested_resources] },
327
- { title: "404 on HTML Files", header: ["Path", "Hits", "Visits"], rows: data[:missed_pages] },
328
- { title: "404 on other Resources", header: ["Path", "Hits", "Visits"], rows: data[:missed_resources] },
329
- { title: "Attacks", header: ["Path", "Hits", "Visits"], rows: data[:attacks], col: "small-12 cell" },
330
341
  { title: "Statuses",
331
342
  header: ["Status", "Count"],
332
343
  rows: data[:statuses],
@@ -494,10 +505,10 @@
494
505
  <th>IP</th>
495
506
  <th>
496
507
  <div class="grid-x grid-margin-x">
497
- <div class="col-2 cell">
508
+ <div class="small-2 cell">
498
509
  Day
499
510
  </div>
500
- <div class="col-10 cell">
511
+ <div class="small-10 cell">
501
512
  Resources
502
513
  </div>
503
514
  </div>
@@ -513,26 +524,25 @@
513
524
  <td class="streaks">
514
525
  <div class="grid-x grid-margin-x">
515
526
  <% date_urls.group_by { |x| x[1] }.each do |date, urls| %>
516
- <div class="col-2 cell">
527
+ <div class="small-12 medium-1 cell">
517
528
  <span class="date"><%= date %></span>
518
529
  </div>
519
- <div class="col-10 cell grid-x">
520
- <div class="small-12 medium-6 cell">
521
- <span class="res-title">HTML:</span>
522
- <ul>
523
- <% urls.map { |x| x[2] }.compact.select { |x| x.match /.*\.html?/ }.each do |url| %>
524
- <li><%= url %></li>
525
- <% end %>
526
- </ul>
527
- </div>
528
- <div class=" small-12 medium-6 cell">
529
- <span class="res-title small-12 medium-6 cell">Other Resources:</span>
530
- <ul>
531
- <% urls.map { |x| x[2] }.compact.sort.select { |x| x and not x.match /.*\.html?/ }.each do |url| %>
532
- <li><%= url %></li>
533
- <% end %>
534
- </ul>
535
- </div>
530
+ <div class="small-12 medium-5 cell">
531
+ <span class="res-title">HTML:</span>
532
+ <% unique_with_count = urls.map { |x| x[2] }.compact.group_by{|e| e}.map{|k, v| [k, v.length]} %>
533
+ <ul class="no-bullet">
534
+ <% unique_with_count.select { |x| x[0].match /.*\.html?/ }.each do |url| %>
535
+ <li>[<%= url[1] %>] <%= Emitter::escape_javascript url[0] %></li>
536
+ <% end %>
537
+ </ul>
538
+ </div>
539
+ <div class=" small-12 medium-5 cell">
540
+ <span class="res-title">Other Resources:</span>
541
+ <ul class="no-bullet">
542
+ <% unique_with_count.select { |x| x[0] and ! x[0].match /.*\.html?/ }.each do |url| %>
543
+ <li>[<%= url[1] %>] <%= Emitter::escape_javascript url[0] %></li>
544
+ <% end %>
545
+ </ul>
536
546
  </div>
537
547
  <% end %>
538
548
  </div>
@@ -1,3 +1,3 @@
1
1
  module LogSense
2
- VERSION = "1.3.5"
2
+ VERSION = "1.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: log_sense
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.5
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adolfo Fibrillation
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-26 00:00:00.000000000 Z
11
+ date: 2022-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: browser