squiggle 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +15 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +48 -0
- data/Rakefile +29 -0
- data/lib/squiggle.rb +15 -0
- data/lib/squiggle/base.rb +39 -0
- data/lib/squiggle/chunk_parser.rb +30 -0
- data/lib/squiggle/domain_parser.rb +51 -0
- data/lib/squiggle/log_line.rb +105 -0
- data/lib/squiggle/squid_standard_parser.rb +36 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_domain_parser.rb +26 -0
- data/test/test_helper.rb +54 -0
- data/test/test_log_line.rb +66 -0
- data/test/test_squid_parser.rb +30 -0
- data/test/test_squiggle.rb +6 -0
- metadata +137 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
PostInstall.txt
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
lib/squiggle.rb
|
7
|
+
lib/squiggle/base.rb
|
8
|
+
lib/squiggle/chunk_parser.rb
|
9
|
+
lib/squiggle/domain_parser.rb
|
10
|
+
lib/squiggle/log_line.rb
|
11
|
+
lib/squiggle/squid_standard_parser.rb
|
12
|
+
script/console
|
13
|
+
script/destroy
|
14
|
+
script/generate
|
15
|
+
test/test_helper.rb
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
= squiggle
|
2
|
+
|
3
|
+
* http://github.com/#{github_username}/#{project_name}
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Log line parser
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
Used for NetFox Agent
|
12
|
+
|
13
|
+
== SYNOPSIS:
|
14
|
+
|
15
|
+
TODO
|
16
|
+
|
17
|
+
== REQUIREMENTS:
|
18
|
+
|
19
|
+
* Active Support >= 3.0.3
|
20
|
+
|
21
|
+
== INSTALL:
|
22
|
+
|
23
|
+
* sudo gem install
|
24
|
+
|
25
|
+
== LICENSE:
|
26
|
+
|
27
|
+
(The MIT License)
|
28
|
+
|
29
|
+
Copyright (c) 2011 Daniel Draper, NetFox
|
30
|
+
|
31
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
32
|
+
a copy of this software and associated documentation files (the
|
33
|
+
'Software'), to deal in the Software without restriction, including
|
34
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
35
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
36
|
+
permit persons to whom the Software is furnished to do so, subject to
|
37
|
+
the following conditions:
|
38
|
+
|
39
|
+
The above copyright notice and this permission notice shall be
|
40
|
+
included in all copies or substantial portions of the Software.
|
41
|
+
|
42
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
43
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
44
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
45
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
46
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
47
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
48
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'activesupport', '>= 3.0.3'
|
3
|
+
gem 'hoe', '>= 2.1.0'
|
4
|
+
require 'hoe'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
require 'active_support/core_ext/time/zones'
|
8
|
+
|
9
|
+
require './lib/squiggle'
|
10
|
+
|
11
|
+
Hoe.plugin :newgem
|
12
|
+
# Hoe.plugin :website
|
13
|
+
# Hoe.plugin :cucumberfeatures
|
14
|
+
|
15
|
+
# Generate all the Rake tasks
|
16
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
17
|
+
$hoe = Hoe.spec 'squiggle' do
|
18
|
+
self.developer 'Daniel Draper', 'daniel@netfox.com'
|
19
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
20
|
+
self.rubyforge_name = self.name # TODO this is default value
|
21
|
+
self.extra_deps = [['domainatrix'], ['activesupport', "~> 3.0.3"]]
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'newgem/tasks'
|
25
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
26
|
+
|
27
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
28
|
+
# remove_task :default
|
29
|
+
# task :default => [:spec, :features]
|
data/lib/squiggle.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
require 'active_support/core_ext/time/conversions'
|
5
|
+
require 'active_support/core_ext/time/zones'
|
6
|
+
|
7
|
+
require 'squiggle/base'
|
8
|
+
require 'squiggle/chunk_parser'
|
9
|
+
require 'squiggle/domain_parser'
|
10
|
+
require 'squiggle/log_line'
|
11
|
+
require 'squiggle/squid_standard_parser'
|
12
|
+
|
13
|
+
module Squiggle
|
14
|
+
VERSION = '0.0.1'
|
15
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
module Squiggle
|
3
|
+
class Base
|
4
|
+
def initialize(time_zone)
|
5
|
+
@time_zone = time_zone
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse(line)
|
9
|
+
logline = process(line)
|
10
|
+
if logline.valid?
|
11
|
+
logline.cached = cached?(logline)
|
12
|
+
logline.pageview = pageview?(logline)
|
13
|
+
else
|
14
|
+
STDERR.puts("INVALID LINE (#{logline.errors}): '#{line}'") unless line.blank?
|
15
|
+
end
|
16
|
+
logline
|
17
|
+
end
|
18
|
+
|
19
|
+
def pageview?(logline)
|
20
|
+
case logline.mime_type
|
21
|
+
when /html/,/text/,/pdf/ then true
|
22
|
+
else false
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def cached?(logline)
|
27
|
+
# TODO Implement this
|
28
|
+
false
|
29
|
+
end
|
30
|
+
|
31
|
+
# Return the time in the client's time zone
|
32
|
+
def parse_timestamp(str)
|
33
|
+
# Parse the epoch seconds into UTC (assumes Time.zone set to UTC in environment)
|
34
|
+
t = Time.at(str.gsub(/^L/, '').to_i)
|
35
|
+
# Return the time zone in the clients TZ
|
36
|
+
t.in_time_zone(@time_zone)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Squiggle
|
2
|
+
class ChunkParser
|
3
|
+
def initialize(chunk, options = {})
|
4
|
+
options[:parser] = SquidStandardParser
|
5
|
+
@parser = options[:parser].new(options[:time_zone])
|
6
|
+
@lines = chunk.split("\n").select { |e| e.length > 5 }
|
7
|
+
@current_line = next_line
|
8
|
+
Rails.logger.info("Chunk Parser received: #{@lines.size} lines")
|
9
|
+
end
|
10
|
+
|
11
|
+
def has_lines?
|
12
|
+
!@lines.empty?
|
13
|
+
end
|
14
|
+
|
15
|
+
def current_line
|
16
|
+
@current_line
|
17
|
+
end
|
18
|
+
|
19
|
+
def next_line
|
20
|
+
return nil if @lines.empty?
|
21
|
+
to_parse = @lines.shift
|
22
|
+
logline = @parser.parse(to_parse)
|
23
|
+
if logline.invalid?
|
24
|
+
STDERR.puts "Line is INVALID"
|
25
|
+
logline = self.next_line # recurse
|
26
|
+
end
|
27
|
+
@current_line = logline
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'domainatrix'
|
2
|
+
|
3
|
+
class Domainatrix::Url
|
4
|
+
attr_accessor :query
|
5
|
+
|
6
|
+
def toplevel
|
7
|
+
[ domain, public_suffix ].compact.join(".")
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Squiggle
|
12
|
+
class DomainParser < Domainatrix::DomainParser
|
13
|
+
|
14
|
+
def parse(url)
|
15
|
+
uri = URI.parse(url)
|
16
|
+
Domainatrix::Url.new(parse_domains_from_host(uri.host).merge({
|
17
|
+
:scheme => uri.scheme,
|
18
|
+
:host => uri.host,
|
19
|
+
:path => uri.path,
|
20
|
+
:query => uri.query,
|
21
|
+
:url => url
|
22
|
+
}))
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO: This is a big monkey patch - we should be forking and fixing this
|
26
|
+
def parse_domains_from_host(host)
|
27
|
+
parts = host.split(".").reverse
|
28
|
+
public_suffix = []
|
29
|
+
domain = ""
|
30
|
+
subdomains = []
|
31
|
+
sub_hash = @public_suffixes
|
32
|
+
parts.each_index do |i|
|
33
|
+
part = parts[i]
|
34
|
+
sub_parts = sub_hash[part]
|
35
|
+
sub_hash = sub_parts
|
36
|
+
if sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
|
37
|
+
public_suffix << part
|
38
|
+
domain = parts[i+1]
|
39
|
+
subdomains = parts.slice(i+2, parts.size)
|
40
|
+
break
|
41
|
+
else
|
42
|
+
public_suffix << part
|
43
|
+
end
|
44
|
+
end
|
45
|
+
{:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
|
46
|
+
rescue
|
47
|
+
# Applies to IP Addresses here too
|
48
|
+
{:public_suffix => nil, :domain => host, :subdomain => nil}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Squiggle
|
5
|
+
class LogLine
|
6
|
+
|
7
|
+
@@domain_parser = DomainParser.new("lib/effective_tld_names.dat")
|
8
|
+
|
9
|
+
attr_accessor :bytes
|
10
|
+
attr_accessor :cache_status
|
11
|
+
attr_accessor :cache_sibling
|
12
|
+
attr_accessor :cached
|
13
|
+
attr_accessor :client_ip
|
14
|
+
attr_accessor :created_at
|
15
|
+
attr_reader :http_resp_code
|
16
|
+
attr_reader :mime_type
|
17
|
+
attr_accessor :pageview
|
18
|
+
attr_reader :uri
|
19
|
+
attr_reader :username
|
20
|
+
attr_accessor :original_line
|
21
|
+
attr_reader :errors
|
22
|
+
|
23
|
+
alias :cached? :cached
|
24
|
+
alias :pageview? :pageview
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@errors = {}
|
28
|
+
@invalid = false
|
29
|
+
# Set defaults
|
30
|
+
self.pageview = false
|
31
|
+
self.cached = false
|
32
|
+
yield self if block_given?
|
33
|
+
class << @errors
|
34
|
+
def to_s
|
35
|
+
self.map { |(k,v)| "#{k} => #{v}" }.join(", ")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def invalid?
|
41
|
+
# TODO: Run more checks and log if a check fails
|
42
|
+
return true if @invalid
|
43
|
+
if @uri.nil?
|
44
|
+
@errors[:uri] = "Missing URL FROM #{@original_line}"
|
45
|
+
return true
|
46
|
+
end
|
47
|
+
unless http_resp_code =~ /\A[+-]?\d+\Z/
|
48
|
+
@errors[:http_resp_code] = "Invalid HTTP Response Code"
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
if http_resp_code && http_resp_code.to_i == 407
|
52
|
+
@errors[:http_resp_code] = "407 code is ignored so setting to invalid"
|
53
|
+
return true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def valid?
|
58
|
+
!invalid?
|
59
|
+
end
|
60
|
+
|
61
|
+
def http_resp_code=(code)
|
62
|
+
if code == "-"
|
63
|
+
code = "200"
|
64
|
+
end
|
65
|
+
@http_resp_code = code
|
66
|
+
end
|
67
|
+
|
68
|
+
def mime_type=(mt)
|
69
|
+
if mt == "-"
|
70
|
+
mt = "Unknown"
|
71
|
+
end
|
72
|
+
@mime_type = mt
|
73
|
+
end
|
74
|
+
|
75
|
+
def username=(uname)
|
76
|
+
@username = URI.decode(uname || '').gsub(/\"/, '')
|
77
|
+
end
|
78
|
+
|
79
|
+
def uri=(uri)
|
80
|
+
raise "No Domain Parser Set" unless @@domain_parser
|
81
|
+
# CONNECT Requests
|
82
|
+
unless uri =~ /^http/
|
83
|
+
uri = "https://#{uri}"
|
84
|
+
end
|
85
|
+
@uri = @@domain_parser.parse(uri)
|
86
|
+
rescue
|
87
|
+
@invalid = true
|
88
|
+
@errors[:uri] = "FAILED URL: '#{uri}' (#{$!})"
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns the cost for this line as a float
|
92
|
+
# TODO: Make the logserver an event machine and use EM::Deferrable here??
|
93
|
+
def cost
|
94
|
+
pc = PolicyClient.new(self)
|
95
|
+
pc.cost
|
96
|
+
end
|
97
|
+
|
98
|
+
def copy_line
|
99
|
+
arr = [ bytes, cached, client_ip, created_at, uri.toplevel, uri.host, http_resp_code, mime_type, (pageview ? 1 : 0), uri.path, uri.scheme, username ]
|
100
|
+
arr.map { |entry| "\"#{entry}\"" }.join(",")
|
101
|
+
end
|
102
|
+
|
103
|
+
# TODO: URI Escape? quotes around commas?
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Squiggle
|
2
|
+
# Based on standard squid log format
|
3
|
+
# %ts.%03tu %6tr %>a %Ss/%03Hs %<st %rm %ru %un %Sh/%<A %mt
|
4
|
+
# Example:
|
5
|
+
# 1253604221.678 19 127.0.0.1 TCP_REFRESH_FAIL_HIT/302 562 GET http://www.gravatar.com/blavatar/e81cfb9068d04d1cfd598533bb380e1f?s=16&d=http://s.wordpress.com/favicon.ico - NONE/- text/html
|
6
|
+
#
|
7
|
+
# TODO: Write a log format parser like squid's
|
8
|
+
class SquidStandardParser < Base
|
9
|
+
def process(line)
|
10
|
+
if line.nil?
|
11
|
+
return LogLine.new
|
12
|
+
end
|
13
|
+
line.strip!
|
14
|
+
if line.empty?
|
15
|
+
return LogLine.new
|
16
|
+
end
|
17
|
+
toks = line.split(/\s+/)
|
18
|
+
return LogLine.new do |ll|
|
19
|
+
ll.original_line = line
|
20
|
+
ll.created_at = parse_timestamp(toks[0])
|
21
|
+
ll.client_ip = toks[2]
|
22
|
+
ll.cache_status, ll.http_resp_code = (toks[3] || "").split("/")
|
23
|
+
ll.bytes = toks[4].to_i
|
24
|
+
ll.uri = toks[6]
|
25
|
+
ll.username = toks[7]
|
26
|
+
ll.cache_sibling = toks[8].try(:split, "/").try(:[], 0)
|
27
|
+
ll.mime_type = toks[9]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def cached?(logline)
|
32
|
+
# TODO Implement this
|
33
|
+
false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/squiggle.rb'}"
|
9
|
+
puts "Loading squiggle gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
class DomainParserTest < ActiveSupport::TestCase
|
4
|
+
test "ip address form" do
|
5
|
+
assert_extracted_domain "192.168.10.100", "http://192.168.10.100/foo/bar"
|
6
|
+
end
|
7
|
+
|
8
|
+
test "internal server with no FQDN" do
|
9
|
+
assert_extracted_domain "internal", "http://internal/foo/bar"
|
10
|
+
end
|
11
|
+
|
12
|
+
test "internal server with FQDN" do
|
13
|
+
assert_extracted_domain "internal.highschool", "http://internal.highschool/foo/bar"
|
14
|
+
end
|
15
|
+
|
16
|
+
test "valid domains" do
|
17
|
+
suffixes = read_dat_file("lib/effective_tld_names.dat")
|
18
|
+
suffixes.each do |(k,v)|
|
19
|
+
suffix(k,v) do |entry|
|
20
|
+
assert_extracted_domain("website.#{entry}", "http://website.#{entry}/foo/bar?q=abc123")
|
21
|
+
assert_extracted_domain("website.#{entry}", "http://www.website.#{entry}/foo/bar?q=abc123")
|
22
|
+
assert_extracted_domain("website.#{entry}", "http://subdomain.website.#{entry}/foo/bar?q=abc123")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'stringio'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'factory_girl'
|
5
|
+
require 'faker'
|
6
|
+
require File.dirname(__FILE__) + '/../lib/squiggle'
|
7
|
+
|
8
|
+
Factory.define(:logline, :class => Squiggle::LogLine) do |f|
|
9
|
+
f.bytes { rand(1000) }
|
10
|
+
f.cache_status { '' }
|
11
|
+
f.cache_sibling { '' }
|
12
|
+
f.created_at { Time.now }
|
13
|
+
f.client_ip { (1..4).to_a.map { rand(254) + 1 }.join(".") }
|
14
|
+
f.uri { "http://#{Faker::Internet.domain_name}" }
|
15
|
+
f.username { Faker::Internet.user_name }
|
16
|
+
f.mime_type { "text/html" }
|
17
|
+
f.http_resp_code "200"
|
18
|
+
end
|
19
|
+
|
20
|
+
class ActiveSupport::TestCase
|
21
|
+
def assert_extracted_domain(result, source)
|
22
|
+
@parser ||= Squiggle::DomainParser.new("lib/effective_tld_names.dat")
|
23
|
+
assert_equal result, @parser.parse(source).toplevel
|
24
|
+
end
|
25
|
+
|
26
|
+
def suffix(key, values)
|
27
|
+
if values.empty? or values == '*'
|
28
|
+
yield(key.gsub(/\!/, ''))
|
29
|
+
else
|
30
|
+
values.each do |k,v|
|
31
|
+
unless k == "*"
|
32
|
+
suffix("#{k}.#{key}", v) { |e| yield e.gsub(/\!/, '') }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
def read_dat_file(file_name)
|
40
|
+
@public_suffixes = {}
|
41
|
+
File.readlines(file_name).each do |line|
|
42
|
+
line = line.strip
|
43
|
+
unless (line =~ /\/\//) || line.empty?
|
44
|
+
parts = line.split(".").reverse
|
45
|
+
|
46
|
+
sub_hash = @public_suffixes
|
47
|
+
parts.each do |part|
|
48
|
+
sub_hash = (sub_hash[part] ||= {})
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
@public_suffixes
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
|
2
|
+
class LogLineTest < ActiveSupport::TestCase
|
3
|
+
test "basic log line" do
|
4
|
+
logline = Factory.build(:logline)
|
5
|
+
assert logline.valid?
|
6
|
+
assert logline.errors.empty?
|
7
|
+
end
|
8
|
+
|
9
|
+
test "non-integer http response code" do
|
10
|
+
logline = Factory.build(:logline, :http_resp_code => "gooby")
|
11
|
+
assert !logline.valid?
|
12
|
+
assert !logline.errors.empty?
|
13
|
+
assert logline.errors.has_key?(:http_resp_code)
|
14
|
+
end
|
15
|
+
|
16
|
+
test "407 http response code is invalid" do
|
17
|
+
logline = Factory.build(:logline, :http_resp_code => "407")
|
18
|
+
assert !logline.valid?
|
19
|
+
assert !logline.errors.empty?
|
20
|
+
assert logline.errors.has_key?(:http_resp_code)
|
21
|
+
end
|
22
|
+
|
23
|
+
test "URL is valid" do
|
24
|
+
logline = Factory.build(:logline, :uri => "http://www.google.com.au/search?q=bar")
|
25
|
+
assert logline.uri
|
26
|
+
assert_equal "www.google.com.au", logline.uri.host
|
27
|
+
assert_equal "/search", logline.uri.path
|
28
|
+
assert_equal "http", logline.uri.scheme
|
29
|
+
assert_equal "google.com.au", logline.uri.toplevel
|
30
|
+
end
|
31
|
+
|
32
|
+
test "CONNECT request is valid" do
|
33
|
+
logline = Factory.build(:logline, :uri => "www.westpac.com.au:443")
|
34
|
+
assert logline.uri
|
35
|
+
assert_equal "www.westpac.com.au", logline.uri.host
|
36
|
+
assert_equal "", logline.uri.path
|
37
|
+
assert_equal "https", logline.uri.scheme
|
38
|
+
assert_equal "westpac.com.au", logline.uri.toplevel
|
39
|
+
end
|
40
|
+
|
41
|
+
test "ssl URI is valid" do
|
42
|
+
logline = Factory.build(:logline, :uri => "https://edsuite.decs.sa.edu.au/login.php")
|
43
|
+
assert logline.uri
|
44
|
+
assert_equal "edsuite.decs.sa.edu.au", logline.uri.host
|
45
|
+
assert_equal "/login.php", logline.uri.path
|
46
|
+
assert_equal "https", logline.uri.scheme
|
47
|
+
assert_equal "decs.sa.edu.au", logline.uri.toplevel
|
48
|
+
end
|
49
|
+
|
50
|
+
test "blank username is valid" do
|
51
|
+
logline = Factory.build(:logline, :username => '-')
|
52
|
+
assert logline.valid?
|
53
|
+
assert_equal logline.username, "-"
|
54
|
+
end
|
55
|
+
|
56
|
+
test "digest parsed username is valid" do
|
57
|
+
logline = Factory.build(:logline, :username => '%22daniel%20draper%22')
|
58
|
+
assert logline.valid?
|
59
|
+
assert_equal logline.username, "daniel draper"
|
60
|
+
end
|
61
|
+
|
62
|
+
test "copy line" do
|
63
|
+
logline = Factory.build(:logline)
|
64
|
+
assert logline.copy_line
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
class ParserTest < ActiveSupport::TestCase
|
3
|
+
# TODO: Timestamp parsing
|
4
|
+
# TODO: Rename this to SquidParserTest
|
5
|
+
|
6
|
+
test "basic parsing" do
|
7
|
+
raw = "1253604221 19 127.0.0.1 TCP_MISS/200 562 GET http://www.gravatar.com/blavatar/e81cfb9068d04d1cfd598533bb380e1f?s=16&d=http://s.wordpress.com/favicon.ico - NONE/- text/html"
|
8
|
+
parser = Squiggle::SquidStandardParser.new('Adelaide')
|
9
|
+
logline = parser.parse(raw)
|
10
|
+
assert logline.valid?
|
11
|
+
assert_equal "2009-09-22 16:53:41 +0930", logline.created_at.to_s
|
12
|
+
assert_equal "127.0.0.1", logline.client_ip
|
13
|
+
assert_equal false, logline.cached
|
14
|
+
assert_equal false, logline.cached? # Method alias
|
15
|
+
assert_equal "200", logline.http_resp_code
|
16
|
+
assert_equal 562, logline.bytes
|
17
|
+
end
|
18
|
+
|
19
|
+
test "cached status" do
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
test "page view status" do
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
test "blocked status" do
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: squiggle
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Daniel Draper
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-02-03 00:00:00 +10:30
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: domainatrix
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :runtime
|
31
|
+
version_requirements: *id001
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: activesupport
|
34
|
+
prerelease: false
|
35
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ~>
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
segments:
|
40
|
+
- 3
|
41
|
+
- 0
|
42
|
+
- 3
|
43
|
+
version: 3.0.3
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rubyforge
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
segments:
|
54
|
+
- 2
|
55
|
+
- 0
|
56
|
+
- 4
|
57
|
+
version: 2.0.4
|
58
|
+
type: :development
|
59
|
+
version_requirements: *id003
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: hoe
|
62
|
+
prerelease: false
|
63
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 2
|
69
|
+
- 6
|
70
|
+
- 0
|
71
|
+
version: 2.6.0
|
72
|
+
type: :development
|
73
|
+
version_requirements: *id004
|
74
|
+
description: Log line parser
|
75
|
+
email:
|
76
|
+
- daniel@netfox.com
|
77
|
+
executables: []
|
78
|
+
|
79
|
+
extensions: []
|
80
|
+
|
81
|
+
extra_rdoc_files:
|
82
|
+
- History.txt
|
83
|
+
- Manifest.txt
|
84
|
+
- PostInstall.txt
|
85
|
+
files:
|
86
|
+
- History.txt
|
87
|
+
- Manifest.txt
|
88
|
+
- PostInstall.txt
|
89
|
+
- README.rdoc
|
90
|
+
- Rakefile
|
91
|
+
- lib/squiggle.rb
|
92
|
+
- lib/squiggle/base.rb
|
93
|
+
- lib/squiggle/chunk_parser.rb
|
94
|
+
- lib/squiggle/domain_parser.rb
|
95
|
+
- lib/squiggle/log_line.rb
|
96
|
+
- lib/squiggle/squid_standard_parser.rb
|
97
|
+
- script/console
|
98
|
+
- script/destroy
|
99
|
+
- script/generate
|
100
|
+
- test/test_helper.rb
|
101
|
+
has_rdoc: true
|
102
|
+
homepage: http://github.com/#{github_username}/#{project_name}
|
103
|
+
licenses: []
|
104
|
+
|
105
|
+
post_install_message: PostInstall.txt
|
106
|
+
rdoc_options:
|
107
|
+
- --main
|
108
|
+
- README.rdoc
|
109
|
+
require_paths:
|
110
|
+
- lib
|
111
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
segments:
|
116
|
+
- 0
|
117
|
+
version: "0"
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
segments:
|
123
|
+
- 0
|
124
|
+
version: "0"
|
125
|
+
requirements: []
|
126
|
+
|
127
|
+
rubyforge_project: squiggle
|
128
|
+
rubygems_version: 1.3.6
|
129
|
+
signing_key:
|
130
|
+
specification_version: 3
|
131
|
+
summary: Log line parser
|
132
|
+
test_files:
|
133
|
+
- test/test_domain_parser.rb
|
134
|
+
- test/test_helper.rb
|
135
|
+
- test/test_log_line.rb
|
136
|
+
- test/test_squid_parser.rb
|
137
|
+
- test/test_squiggle.rb
|