log_sense 1.3.5 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30e8194103003ca9861272072bbf9ef199d7d80b67a0b73fb38d510f23adacee
4
- data.tar.gz: 00b36829dd41b27e79cd6a41cacb412479631c0be9a9e2b79d585f81e0c7efa8
3
+ metadata.gz: 266e20972553f6d409814398dab832334a4c01bfa12e90c74acfdc75ee0b7c8d
4
+ data.tar.gz: 228e6bdc2d931e5190d82fc5ba66660ff6f0de0277a876746154de81a1ffe4e2
5
5
  SHA512:
6
- metadata.gz: 41b392e7d5ec01052dbb645d50b6dc6736a82c04ea231ac33f3fa2c3679cc27a959164a8f5f9524f1604f1b495c032978a2e4e14ca01a706448fc6a8d7556185
7
- data.tar.gz: fea27d4e0765fec9b090101a3efec3eb5ebd162af1c7389162d34c876754980b5dbccb22141b3aa4a0ec3eeb44bfd3936e17cc5463dc93148c3718bb51a1dcec
6
+ metadata.gz: f1454d78cfec258ff3bc69359be29178ebab4cf7ffd2869d736c29f2cffd6efe209f65be59298864bf94de30bd022a3397c91446b043c49e704b6d38ced59357
7
+ data.tar.gz: aa7239af4bb17270a23d9931194859b01a8ff50ebb9cc3c3ed37aeac1702aef413051b0381c57208dcffedd31ccbff5bd0a9485c0fe18947f540cda9f4463acd
data/CHANGELOG.org CHANGED
@@ -2,6 +2,25 @@
2
2
  #+AUTHOR: Adolfo Villafiorita
3
3
  #+STARTUP: showall
4
4
 
5
+ * 1.4.0
6
+
7
+ - [User] The Apache Log report now organizes page requests in four
8
+ tables:
9
+ - success on HTML pages
10
+ - success on other resources
11
+ - failures on HTML pages
12
+ - failures on other resources
13
+ - [User] Increased the default limit of pages in reports to 900
14
+ - [User] The return status in now included in the page and resources
15
+ reports
16
+ - [User] The "Attack" table has been removed, since the data can be
17
+ gotten from the previous tables
18
+ - [Fixed] HTML pages are those with extension ".html" and ".htm"
19
+ - [Fixed] Wrong data on summary table of the apache report has
20
+ been fixed
21
+ - [Fixed] Better JavaScript escaping to avoid log poisoning
22
+ - [Fixed] Strengthened the Apache log parser
23
+
5
24
  * 1.3.3 and 1.3.4
6
25
 
7
26
  - [Gem] Moved repository to Github and fixes to gemspec
@@ -6,7 +6,7 @@ module LogSense
6
6
  # @ variables are automatically put in the returned data
7
7
  #
8
8
 
9
- def self.crunch db, options = { limit: 30 }
9
+ def self.crunch db, options = { limit: 900 }
10
10
  first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
11
11
  last_day_s = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
12
12
 
@@ -89,16 +89,18 @@ module LogSense
89
89
 
90
90
  @daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by date(datetime)"
91
91
  @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by strftime('%H', datetime)"
92
- @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
93
- @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
94
- @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
95
- @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
96
92
 
97
- @reasonable_requests_exts = [ ".html", ".css", ".js", ".jpg", ".svg", ".png", ".woff", ".xml", ".ttf", ".ico", ".pdf", ".htm", ".txt", ".org" ].map { |x|
98
- "extension != '#{x}'"
99
- }.join " and "
93
+ good_statuses = "(status like '2%' or status like '3%')"
94
+ bad_statuses = "(status like '4%' or status like '5%')"
95
+ html_page = "(extension like '.htm%')"
96
+ non_html_page = "(extension not like '.htm%')"
97
+
98
+ @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
99
+ @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
+
101
+ @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
102
+ @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
100
103
 
101
- @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc limit #{options[:limit]}"
102
104
  @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
103
105
 
104
106
  @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
@@ -33,20 +33,19 @@ module LogSense
33
33
 
34
34
  HTTP_METHODS=/GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
35
35
  WEBDAV_METHODS=/COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
36
- OTHER_METHODS=/SEARCH|REPORT/
36
+ OTHER_METHODS=/SEARCH|REPORT|PRI|HEAD\/robots.txt/
37
37
  METHOD=/(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
38
- PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9])/
38
+ PROTOCOL=/(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
39
39
  URL=/(?<url>[^ ]+)/
40
- REFERER=/(?<referer>[^ ]+)/
40
+ REFERER=/(?<referer>[^"]*)/
41
41
  RETURN_CODE=/(?<status>[1-5][0-9][0-9])/
42
42
  SIZE=/(?<size>[0-9]+|-)/
43
-
44
- USER_AGENT = /(?<user_agent>[^"]+)/
43
+ USER_AGENT = /(?<user_agent>[^"]*)/
45
44
 
46
45
  attr_reader :format
47
46
 
48
47
  def initialize
49
- @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
48
+ @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
50
49
  end
51
50
 
52
51
  def parse line
@@ -41,7 +41,20 @@ module LogSense
41
41
  end
42
42
 
43
43
  def self.escape_javascript(string)
44
- js_escape_map = { "\\" => "\\\\", "</" => '<\/', "\r\n" => '\n', "\n" => '\n', "\r" => '\n', '"' => '\\"', "'" => "\\'", "`" => "\\`", "$" => "\\$" }
44
+ js_escape_map = {
45
+ "<script" => "&lt;script",
46
+ "</script" => "&lt;/script",
47
+ "<" => "&lt;",
48
+ "</" => '&lt;\/',
49
+ "\\" => "\\\\",
50
+ "\r\n" => '\\r\\n',
51
+ "\n" => '\\n',
52
+ "\r" => '\\r',
53
+ '"' => ' \\"',
54
+ "'" => " \\'",
55
+ "`" => " \\`",
56
+ "$" => " \\$"
57
+ }
45
58
  js_escape_map.each do |k, v|
46
59
  string = string.gsub(k, v)
47
60
  end
@@ -8,7 +8,7 @@ module LogSense
8
8
  # parse command line options
9
9
  #
10
10
  def self.parse options
11
- limit = 30
11
+ limit = 900
12
12
  args = {}
13
13
 
14
14
  opt_parser = OptionParser.new do |opts|
@@ -7,7 +7,7 @@ module LogSense
7
7
  # @ variables are automatically put in the returned data
8
8
  #
9
9
 
10
- def self.crunch db, options = { limit: 30 }
10
+ def self.crunch db, options = { limit: 900 }
11
11
  first_day_s = db.execute "SELECT started_at from Event where started_at not NULL order by started_at limit 1"
12
12
  # we could use ended_at to cover the full activity period, but I prefer started_at
13
13
  # with the meaning that the monitor event initiation
@@ -19,6 +19,7 @@ end
19
19
  $(document).ready(function(){
20
20
  $('#table-<%= index %>').dataTable({
21
21
  data: data_<%= index %>,
22
+ <%= report[:datatable_options] + "," if report[:datatable_options] %>
22
23
  columns: [
23
24
  <% report[:header].each do |header| %>
24
25
  { data: '<%= header %>', className: '<%= slugify(header) %>' },
@@ -4,7 +4,8 @@
4
4
  <% report[:rows].each do |row| %>
5
5
  {
6
6
  <% report[:header].each_with_index do |h, i| %>
7
- "<%= h %>": <%= (row[i].class == Integer or row[i].class == Float) ? row[i] : "\"#{Emitter::escape_javascript(row[i] || '')}\"" %>,
7
+ <% resized_row = (row[i] || '').size > 150 ? "#{row[i][0..150]...}" : (row[i] || "") %>
8
+ "<%= h %>": <%= (row[i].class == Integer or row[i].class == Float) ? row[i] : "\"#{Emitter::escape_javascript(resized_row)}\"" %>,
8
9
  <% end %>
9
10
  },
10
11
  <% end %>
@@ -17,7 +17,12 @@
17
17
  <%= data[:total_unique_visits] %> <span class="stats-list-label">Unique Visits</span>
18
18
  </li>
19
19
  <li class="stats-list-negative">
20
- <%= data[:total_unique_visits] != 0 ? data[:total_hits] / data[:total_unique_visits] : "N/A" %>
20
+ <% days = data[:last_day_in_analysis] - data[:first_day_in_analysis] %>
21
+ <%= days > 0 ? "%.2f" % (data[:total_unique_visits] / days.to_f) : "N/A" %>
21
22
  <span class="stats-list-label">Unique Visits / Day</span>
22
23
  </li>
24
+ <li class="stats-list-negative">
25
+ <%= data[:total_unique_visits] != 0 ? data[:total_hits] / data[:total_unique_visits] : "N/A" %>
26
+ <span class="stats-list-label">Page Visited / Unique Visitor</span>
27
+ </li>
23
28
  </ul>
@@ -157,11 +157,10 @@
157
157
  "Log Structure",
158
158
  "Daily Distribution",
159
159
  "Time Distribution",
160
- "Most Requested Pages",
161
- "Most Requested Resources",
162
- "404 on HTML Files",
163
- "404 on other Resources",
164
- "Attacks",
160
+ "20_ and 30_ on HTML pages",
161
+ "20_ and 30_ on other resources",
162
+ "40_ and 50_ on HTML pages",
163
+ "40_ and 50_ on other Resources",
165
164
  "Statuses",
166
165
  "Daily Statuses",
167
166
  "Browsers",
@@ -319,14 +318,26 @@
319
318
  }
320
319
  }
321
320
  },
322
- { title: "Most Requested Pages",
323
- header: ["Path", "Hits", "Visits", "Size"],
321
+ { title: "20_ and 30_ on HTML pages",
322
+ header: ["Path", "Hits", "Visits", "Size", "Status"],
324
323
  rows: data[:most_requested_pages],
324
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
325
+ },
326
+ { title: "20_ and 30_ on other resources",
327
+ header: ["Path", "Hits", "Visits", "Size", "Status"],
328
+ rows: data[:most_requested_resources],
329
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
330
+ },
331
+ { title: "40_ and 50_x on HTML pages",
332
+ header: ["Path", "Hits", "Visits", "Status"],
333
+ rows: data[:missed_pages],
334
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
335
+ },
336
+ { title: "40_ and 50_ on other resources",
337
+ header: ["Path", "Hits", "Visits", "Status"],
338
+ rows: data[:missed_resources],
339
+ datatable_options: "columnDefs: [{ width: \"40%\", targets: 0 } ]"
325
340
  },
326
- { title: "Most Requested Resources", header: ["Path", "Hits", "Visits", "Size"], rows: data[:most_requested_resources] },
327
- { title: "404 on HTML Files", header: ["Path", "Hits", "Visits"], rows: data[:missed_pages] },
328
- { title: "404 on other Resources", header: ["Path", "Hits", "Visits"], rows: data[:missed_resources] },
329
- { title: "Attacks", header: ["Path", "Hits", "Visits"], rows: data[:attacks], col: "small-12 cell" },
330
341
  { title: "Statuses",
331
342
  header: ["Status", "Count"],
332
343
  rows: data[:statuses],
@@ -494,10 +505,10 @@
494
505
  <th>IP</th>
495
506
  <th>
496
507
  <div class="grid-x grid-margin-x">
497
- <div class="col-2 cell">
508
+ <div class="small-2 cell">
498
509
  Day
499
510
  </div>
500
- <div class="col-10 cell">
511
+ <div class="small-10 cell">
501
512
  Resources
502
513
  </div>
503
514
  </div>
@@ -513,26 +524,25 @@
513
524
  <td class="streaks">
514
525
  <div class="grid-x grid-margin-x">
515
526
  <% date_urls.group_by { |x| x[1] }.each do |date, urls| %>
516
- <div class="col-2 cell">
527
+ <div class="small-12 medium-1 cell">
517
528
  <span class="date"><%= date %></span>
518
529
  </div>
519
- <div class="col-10 cell grid-x">
520
- <div class="small-12 medium-6 cell">
521
- <span class="res-title">HTML:</span>
522
- <ul>
523
- <% urls.map { |x| x[2] }.compact.select { |x| x.match /.*\.html?/ }.each do |url| %>
524
- <li><%= url %></li>
525
- <% end %>
526
- </ul>
527
- </div>
528
- <div class=" small-12 medium-6 cell">
529
- <span class="res-title small-12 medium-6 cell">Other Resources:</span>
530
- <ul>
531
- <% urls.map { |x| x[2] }.compact.sort.select { |x| x and not x.match /.*\.html?/ }.each do |url| %>
532
- <li><%= url %></li>
533
- <% end %>
534
- </ul>
535
- </div>
530
+ <div class="small-12 medium-5 cell">
531
+ <span class="res-title">HTML:</span>
532
+ <% unique_with_count = urls.map { |x| x[2] }.compact.group_by{|e| e}.map{|k, v| [k, v.length]} %>
533
+ <ul class="no-bullet">
534
+ <% unique_with_count.select { |x| x[0].match /.*\.html?/ }.each do |url| %>
535
+ <li>[<%= url[1] %>] <%= Emitter::escape_javascript url[0] %></li>
536
+ <% end %>
537
+ </ul>
538
+ </div>
539
+ <div class=" small-12 medium-5 cell">
540
+ <span class="res-title">Other Resources:</span>
541
+ <ul class="no-bullet">
542
+ <% unique_with_count.select { |x| x[0] and ! x[0].match /.*\.html?/ }.each do |url| %>
543
+ <li>[<%= url[1] %>] <%= Emitter::escape_javascript url[0] %></li>
544
+ <% end %>
545
+ </ul>
536
546
  </div>
537
547
  <% end %>
538
548
  </div>
@@ -1,3 +1,3 @@
1
1
  module LogSense
2
- VERSION = "1.3.5"
2
+ VERSION = "1.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: log_sense
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.5
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adolfo Fibrillation
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-26 00:00:00.000000000 Z
11
+ date: 2022-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: browser