squiggle 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +15 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +48 -0
- data/Rakefile +29 -0
- data/lib/squiggle.rb +15 -0
- data/lib/squiggle/base.rb +39 -0
- data/lib/squiggle/chunk_parser.rb +30 -0
- data/lib/squiggle/domain_parser.rb +51 -0
- data/lib/squiggle/log_line.rb +105 -0
- data/lib/squiggle/squid_standard_parser.rb +36 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_domain_parser.rb +26 -0
- data/test/test_helper.rb +54 -0
- data/test/test_log_line.rb +66 -0
- data/test/test_squid_parser.rb +30 -0
- data/test/test_squiggle.rb +6 -0
- metadata +137 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
PostInstall.txt
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
lib/squiggle.rb
|
7
|
+
lib/squiggle/base.rb
|
8
|
+
lib/squiggle/chunk_parser.rb
|
9
|
+
lib/squiggle/domain_parser.rb
|
10
|
+
lib/squiggle/log_line.rb
|
11
|
+
lib/squiggle/squid_standard_parser.rb
|
12
|
+
script/console
|
13
|
+
script/destroy
|
14
|
+
script/generate
|
15
|
+
test/test_helper.rb
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
= squiggle
|
2
|
+
|
3
|
+
* http://github.com/#{github_username}/#{project_name}
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Log line parser
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
Used for NetFox Agent
|
12
|
+
|
13
|
+
== SYNOPSIS:
|
14
|
+
|
15
|
+
TODO
|
16
|
+
|
17
|
+
== REQUIREMENTS:
|
18
|
+
|
19
|
+
* Active Support >= 3.0.3
|
20
|
+
|
21
|
+
== INSTALL:
|
22
|
+
|
23
|
+
* sudo gem install
|
24
|
+
|
25
|
+
== LICENSE:
|
26
|
+
|
27
|
+
(The MIT License)
|
28
|
+
|
29
|
+
Copyright (c) 2011 Daniel Draper, NetFox
|
30
|
+
|
31
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
32
|
+
a copy of this software and associated documentation files (the
|
33
|
+
'Software'), to deal in the Software without restriction, including
|
34
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
35
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
36
|
+
permit persons to whom the Software is furnished to do so, subject to
|
37
|
+
the following conditions:
|
38
|
+
|
39
|
+
The above copyright notice and this permission notice shall be
|
40
|
+
included in all copies or substantial portions of the Software.
|
41
|
+
|
42
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
43
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
44
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
45
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
46
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
47
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
48
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'activesupport', '>= 3.0.3'
|
3
|
+
gem 'hoe', '>= 2.1.0'
|
4
|
+
require 'hoe'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
require 'active_support/core_ext/time/zones'
|
8
|
+
|
9
|
+
require './lib/squiggle'
|
10
|
+
|
11
|
+
Hoe.plugin :newgem
|
12
|
+
# Hoe.plugin :website
|
13
|
+
# Hoe.plugin :cucumberfeatures
|
14
|
+
|
15
|
+
# Generate all the Rake tasks
|
16
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
17
|
+
$hoe = Hoe.spec 'squiggle' do
|
18
|
+
self.developer 'Daniel Draper', 'daniel@netfox.com'
|
19
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
20
|
+
self.rubyforge_name = self.name # TODO this is default value
|
21
|
+
self.extra_deps = [['domainatrix'], ['activesupport', "~> 3.0.3"]]
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'newgem/tasks'
|
25
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
26
|
+
|
27
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
28
|
+
# remove_task :default
|
29
|
+
# task :default => [:spec, :features]
|
data/lib/squiggle.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
require 'active_support/core_ext/time/conversions'
|
5
|
+
require 'active_support/core_ext/time/zones'
|
6
|
+
|
7
|
+
require 'squiggle/base'
|
8
|
+
require 'squiggle/chunk_parser'
|
9
|
+
require 'squiggle/domain_parser'
|
10
|
+
require 'squiggle/log_line'
|
11
|
+
require 'squiggle/squid_standard_parser'
|
12
|
+
|
13
|
+
module Squiggle
|
14
|
+
VERSION = '0.0.1'
|
15
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
module Squiggle
|
3
|
+
class Base
|
4
|
+
def initialize(time_zone)
|
5
|
+
@time_zone = time_zone
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse(line)
|
9
|
+
logline = process(line)
|
10
|
+
if logline.valid?
|
11
|
+
logline.cached = cached?(logline)
|
12
|
+
logline.pageview = pageview?(logline)
|
13
|
+
else
|
14
|
+
STDERR.puts("INVALID LINE (#{logline.errors}): '#{line}'") unless line.blank?
|
15
|
+
end
|
16
|
+
logline
|
17
|
+
end
|
18
|
+
|
19
|
+
def pageview?(logline)
|
20
|
+
case logline.mime_type
|
21
|
+
when /html/,/text/,/pdf/ then true
|
22
|
+
else false
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def cached?(logline)
|
27
|
+
# TODO Implement this
|
28
|
+
false
|
29
|
+
end
|
30
|
+
|
31
|
+
# Return the time in the client's time zone
|
32
|
+
def parse_timestamp(str)
|
33
|
+
# Parse the epoch seconds into UTC (assumes Time.zone set to UTC in environment)
|
34
|
+
t = Time.at(str.gsub(/^L/, '').to_i)
|
35
|
+
# Return the time zone in the clients TZ
|
36
|
+
t.in_time_zone(@time_zone)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Squiggle
|
2
|
+
class ChunkParser
|
3
|
+
def initialize(chunk, options = {})
|
4
|
+
options[:parser] = SquidStandardParser
|
5
|
+
@parser = options[:parser].new(options[:time_zone])
|
6
|
+
@lines = chunk.split("\n").select { |e| e.length > 5 }
|
7
|
+
@current_line = next_line
|
8
|
+
Rails.logger.info("Chunk Parser received: #{@lines.size} lines")
|
9
|
+
end
|
10
|
+
|
11
|
+
def has_lines?
|
12
|
+
!@lines.empty?
|
13
|
+
end
|
14
|
+
|
15
|
+
def current_line
|
16
|
+
@current_line
|
17
|
+
end
|
18
|
+
|
19
|
+
def next_line
|
20
|
+
return nil if @lines.empty?
|
21
|
+
to_parse = @lines.shift
|
22
|
+
logline = @parser.parse(to_parse)
|
23
|
+
if logline.invalid?
|
24
|
+
STDERR.puts "Line is INVALID"
|
25
|
+
logline = self.next_line # recurse
|
26
|
+
end
|
27
|
+
@current_line = logline
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'domainatrix'
|
2
|
+
|
3
|
+
class Domainatrix::Url
|
4
|
+
attr_accessor :query
|
5
|
+
|
6
|
+
def toplevel
|
7
|
+
[ domain, public_suffix ].compact.join(".")
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Squiggle
|
12
|
+
class DomainParser < Domainatrix::DomainParser
|
13
|
+
|
14
|
+
def parse(url)
|
15
|
+
uri = URI.parse(url)
|
16
|
+
Domainatrix::Url.new(parse_domains_from_host(uri.host).merge({
|
17
|
+
:scheme => uri.scheme,
|
18
|
+
:host => uri.host,
|
19
|
+
:path => uri.path,
|
20
|
+
:query => uri.query,
|
21
|
+
:url => url
|
22
|
+
}))
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO: This is a big monkey patch - we should be forking and fixing this
|
26
|
+
def parse_domains_from_host(host)
|
27
|
+
parts = host.split(".").reverse
|
28
|
+
public_suffix = []
|
29
|
+
domain = ""
|
30
|
+
subdomains = []
|
31
|
+
sub_hash = @public_suffixes
|
32
|
+
parts.each_index do |i|
|
33
|
+
part = parts[i]
|
34
|
+
sub_parts = sub_hash[part]
|
35
|
+
sub_hash = sub_parts
|
36
|
+
if sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
|
37
|
+
public_suffix << part
|
38
|
+
domain = parts[i+1]
|
39
|
+
subdomains = parts.slice(i+2, parts.size)
|
40
|
+
break
|
41
|
+
else
|
42
|
+
public_suffix << part
|
43
|
+
end
|
44
|
+
end
|
45
|
+
{:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
|
46
|
+
rescue
|
47
|
+
# Applies to IP Addresses here too
|
48
|
+
{:public_suffix => nil, :domain => host, :subdomain => nil}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Squiggle
|
5
|
+
class LogLine
|
6
|
+
|
7
|
+
@@domain_parser = DomainParser.new("lib/effective_tld_names.dat")
|
8
|
+
|
9
|
+
attr_accessor :bytes
|
10
|
+
attr_accessor :cache_status
|
11
|
+
attr_accessor :cache_sibling
|
12
|
+
attr_accessor :cached
|
13
|
+
attr_accessor :client_ip
|
14
|
+
attr_accessor :created_at
|
15
|
+
attr_reader :http_resp_code
|
16
|
+
attr_reader :mime_type
|
17
|
+
attr_accessor :pageview
|
18
|
+
attr_reader :uri
|
19
|
+
attr_reader :username
|
20
|
+
attr_accessor :original_line
|
21
|
+
attr_reader :errors
|
22
|
+
|
23
|
+
alias :cached? :cached
|
24
|
+
alias :pageview? :pageview
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@errors = {}
|
28
|
+
@invalid = false
|
29
|
+
# Set defaults
|
30
|
+
self.pageview = false
|
31
|
+
self.cached = false
|
32
|
+
yield self if block_given?
|
33
|
+
class << @errors
|
34
|
+
def to_s
|
35
|
+
self.map { |(k,v)| "#{k} => #{v}" }.join(", ")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def invalid?
|
41
|
+
# TODO: Run more checks and log if a check fails
|
42
|
+
return true if @invalid
|
43
|
+
if @uri.nil?
|
44
|
+
@errors[:uri] = "Missing URL FROM #{@original_line}"
|
45
|
+
return true
|
46
|
+
end
|
47
|
+
unless http_resp_code =~ /\A[+-]?\d+\Z/
|
48
|
+
@errors[:http_resp_code] = "Invalid HTTP Response Code"
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
if http_resp_code && http_resp_code.to_i == 407
|
52
|
+
@errors[:http_resp_code] = "407 code is ignored so setting to invalid"
|
53
|
+
return true
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def valid?
|
58
|
+
!invalid?
|
59
|
+
end
|
60
|
+
|
61
|
+
def http_resp_code=(code)
|
62
|
+
if code == "-"
|
63
|
+
code = "200"
|
64
|
+
end
|
65
|
+
@http_resp_code = code
|
66
|
+
end
|
67
|
+
|
68
|
+
def mime_type=(mt)
|
69
|
+
if mt == "-"
|
70
|
+
mt = "Unknown"
|
71
|
+
end
|
72
|
+
@mime_type = mt
|
73
|
+
end
|
74
|
+
|
75
|
+
def username=(uname)
|
76
|
+
@username = URI.decode(uname || '').gsub(/\"/, '')
|
77
|
+
end
|
78
|
+
|
79
|
+
def uri=(uri)
|
80
|
+
raise "No Domain Parser Set" unless @@domain_parser
|
81
|
+
# CONNECT Requests
|
82
|
+
unless uri =~ /^http/
|
83
|
+
uri = "https://#{uri}"
|
84
|
+
end
|
85
|
+
@uri = @@domain_parser.parse(uri)
|
86
|
+
rescue
|
87
|
+
@invalid = true
|
88
|
+
@errors[:uri] = "FAILED URL: '#{uri}' (#{$!})"
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns the cost for this line as a float
|
92
|
+
# TODO: Make the logserver an event machine and use EM::Deferrable here??
|
93
|
+
def cost
|
94
|
+
pc = PolicyClient.new(self)
|
95
|
+
pc.cost
|
96
|
+
end
|
97
|
+
|
98
|
+
def copy_line
|
99
|
+
arr = [ bytes, cached, client_ip, created_at, uri.toplevel, uri.host, http_resp_code, mime_type, (pageview ? 1 : 0), uri.path, uri.scheme, username ]
|
100
|
+
arr.map { |entry| "\"#{entry}\"" }.join(",")
|
101
|
+
end
|
102
|
+
|
103
|
+
# TODO: URI Escape? quotes around commas?
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Squiggle
|
2
|
+
# Based on standard squid log format
|
3
|
+
# %ts.%03tu %6tr %>a %Ss/%03Hs %<st %rm %ru %un %Sh/%<A %mt
|
4
|
+
# Example:
|
5
|
+
# 1253604221.678 19 127.0.0.1 TCP_REFRESH_FAIL_HIT/302 562 GET http://www.gravatar.com/blavatar/e81cfb9068d04d1cfd598533bb380e1f?s=16&d=http://s.wordpress.com/favicon.ico - NONE/- text/html
|
6
|
+
#
|
7
|
+
# TODO: Write a log format parser like squid's
|
8
|
+
class SquidStandardParser < Base
|
9
|
+
def process(line)
|
10
|
+
if line.nil?
|
11
|
+
return LogLine.new
|
12
|
+
end
|
13
|
+
line.strip!
|
14
|
+
if line.empty?
|
15
|
+
return LogLine.new
|
16
|
+
end
|
17
|
+
toks = line.split(/\s+/)
|
18
|
+
return LogLine.new do |ll|
|
19
|
+
ll.original_line = line
|
20
|
+
ll.created_at = parse_timestamp(toks[0])
|
21
|
+
ll.client_ip = toks[2]
|
22
|
+
ll.cache_status, ll.http_resp_code = (toks[3] || "").split("/")
|
23
|
+
ll.bytes = toks[4].to_i
|
24
|
+
ll.uri = toks[6]
|
25
|
+
ll.username = toks[7]
|
26
|
+
ll.cache_sibling = toks[8].try(:split, "/").try(:[], 0)
|
27
|
+
ll.mime_type = toks[9]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def cached?(logline)
|
32
|
+
# TODO Implement this
|
33
|
+
false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/squiggle.rb'}"
|
9
|
+
puts "Loading squiggle gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
class DomainParserTest < ActiveSupport::TestCase
|
4
|
+
test "ip address form" do
|
5
|
+
assert_extracted_domain "192.168.10.100", "http://192.168.10.100/foo/bar"
|
6
|
+
end
|
7
|
+
|
8
|
+
test "internal server with no FQDN" do
|
9
|
+
assert_extracted_domain "internal", "http://internal/foo/bar"
|
10
|
+
end
|
11
|
+
|
12
|
+
test "internal server with FQDN" do
|
13
|
+
assert_extracted_domain "internal.highschool", "http://internal.highschool/foo/bar"
|
14
|
+
end
|
15
|
+
|
16
|
+
test "valid domains" do
|
17
|
+
suffixes = read_dat_file("lib/effective_tld_names.dat")
|
18
|
+
suffixes.each do |(k,v)|
|
19
|
+
suffix(k,v) do |entry|
|
20
|
+
assert_extracted_domain("website.#{entry}", "http://website.#{entry}/foo/bar?q=abc123")
|
21
|
+
assert_extracted_domain("website.#{entry}", "http://www.website.#{entry}/foo/bar?q=abc123")
|
22
|
+
assert_extracted_domain("website.#{entry}", "http://subdomain.website.#{entry}/foo/bar?q=abc123")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'stringio'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'factory_girl'
|
5
|
+
require 'faker'
|
6
|
+
require File.dirname(__FILE__) + '/../lib/squiggle'
|
7
|
+
|
8
|
+
Factory.define(:logline, :class => Squiggle::LogLine) do |f|
|
9
|
+
f.bytes { rand(1000) }
|
10
|
+
f.cache_status { '' }
|
11
|
+
f.cache_sibling { '' }
|
12
|
+
f.created_at { Time.now }
|
13
|
+
f.client_ip { (1..4).to_a.map { rand(254) + 1 }.join(".") }
|
14
|
+
f.uri { "http://#{Faker::Internet.domain_name}" }
|
15
|
+
f.username { Faker::Internet.user_name }
|
16
|
+
f.mime_type { "text/html" }
|
17
|
+
f.http_resp_code "200"
|
18
|
+
end
|
19
|
+
|
20
|
+
class ActiveSupport::TestCase
|
21
|
+
def assert_extracted_domain(result, source)
|
22
|
+
@parser ||= Squiggle::DomainParser.new("lib/effective_tld_names.dat")
|
23
|
+
assert_equal result, @parser.parse(source).toplevel
|
24
|
+
end
|
25
|
+
|
26
|
+
def suffix(key, values)
|
27
|
+
if values.empty? or values == '*'
|
28
|
+
yield(key.gsub(/\!/, ''))
|
29
|
+
else
|
30
|
+
values.each do |k,v|
|
31
|
+
unless k == "*"
|
32
|
+
suffix("#{k}.#{key}", v) { |e| yield e.gsub(/\!/, '') }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
def read_dat_file(file_name)
|
40
|
+
@public_suffixes = {}
|
41
|
+
File.readlines(file_name).each do |line|
|
42
|
+
line = line.strip
|
43
|
+
unless (line =~ /\/\//) || line.empty?
|
44
|
+
parts = line.split(".").reverse
|
45
|
+
|
46
|
+
sub_hash = @public_suffixes
|
47
|
+
parts.each do |part|
|
48
|
+
sub_hash = (sub_hash[part] ||= {})
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
@public_suffixes
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
|
2
|
+
class LogLineTest < ActiveSupport::TestCase
|
3
|
+
test "basic log line" do
|
4
|
+
logline = Factory.build(:logline)
|
5
|
+
assert logline.valid?
|
6
|
+
assert logline.errors.empty?
|
7
|
+
end
|
8
|
+
|
9
|
+
test "non-integer http response code" do
|
10
|
+
logline = Factory.build(:logline, :http_resp_code => "gooby")
|
11
|
+
assert !logline.valid?
|
12
|
+
assert !logline.errors.empty?
|
13
|
+
assert logline.errors.has_key?(:http_resp_code)
|
14
|
+
end
|
15
|
+
|
16
|
+
test "407 http response code is invalid" do
|
17
|
+
logline = Factory.build(:logline, :http_resp_code => "407")
|
18
|
+
assert !logline.valid?
|
19
|
+
assert !logline.errors.empty?
|
20
|
+
assert logline.errors.has_key?(:http_resp_code)
|
21
|
+
end
|
22
|
+
|
23
|
+
test "URL is valid" do
|
24
|
+
logline = Factory.build(:logline, :uri => "http://www.google.com.au/search?q=bar")
|
25
|
+
assert logline.uri
|
26
|
+
assert_equal "www.google.com.au", logline.uri.host
|
27
|
+
assert_equal "/search", logline.uri.path
|
28
|
+
assert_equal "http", logline.uri.scheme
|
29
|
+
assert_equal "google.com.au", logline.uri.toplevel
|
30
|
+
end
|
31
|
+
|
32
|
+
test "CONNECT request is valid" do
|
33
|
+
logline = Factory.build(:logline, :uri => "www.westpac.com.au:443")
|
34
|
+
assert logline.uri
|
35
|
+
assert_equal "www.westpac.com.au", logline.uri.host
|
36
|
+
assert_equal "", logline.uri.path
|
37
|
+
assert_equal "https", logline.uri.scheme
|
38
|
+
assert_equal "westpac.com.au", logline.uri.toplevel
|
39
|
+
end
|
40
|
+
|
41
|
+
test "ssl URI is valid" do
|
42
|
+
logline = Factory.build(:logline, :uri => "https://edsuite.decs.sa.edu.au/login.php")
|
43
|
+
assert logline.uri
|
44
|
+
assert_equal "edsuite.decs.sa.edu.au", logline.uri.host
|
45
|
+
assert_equal "/login.php", logline.uri.path
|
46
|
+
assert_equal "https", logline.uri.scheme
|
47
|
+
assert_equal "decs.sa.edu.au", logline.uri.toplevel
|
48
|
+
end
|
49
|
+
|
50
|
+
test "blank username is valid" do
|
51
|
+
logline = Factory.build(:logline, :username => '-')
|
52
|
+
assert logline.valid?
|
53
|
+
assert_equal logline.username, "-"
|
54
|
+
end
|
55
|
+
|
56
|
+
test "digest parsed username is valid" do
|
57
|
+
logline = Factory.build(:logline, :username => '%22daniel%20draper%22')
|
58
|
+
assert logline.valid?
|
59
|
+
assert_equal logline.username, "daniel draper"
|
60
|
+
end
|
61
|
+
|
62
|
+
test "copy line" do
|
63
|
+
logline = Factory.build(:logline)
|
64
|
+
assert logline.copy_line
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
class ParserTest < ActiveSupport::TestCase
|
3
|
+
# TODO: Timestamp parsing
|
4
|
+
# TODO: Rename this to SquidParserTest
|
5
|
+
|
6
|
+
test "basic parsing" do
|
7
|
+
raw = "1253604221 19 127.0.0.1 TCP_MISS/200 562 GET http://www.gravatar.com/blavatar/e81cfb9068d04d1cfd598533bb380e1f?s=16&d=http://s.wordpress.com/favicon.ico - NONE/- text/html"
|
8
|
+
parser = Squiggle::SquidStandardParser.new('Adelaide')
|
9
|
+
logline = parser.parse(raw)
|
10
|
+
assert logline.valid?
|
11
|
+
assert_equal "2009-09-22 16:53:41 +0930", logline.created_at.to_s
|
12
|
+
assert_equal "127.0.0.1", logline.client_ip
|
13
|
+
assert_equal false, logline.cached
|
14
|
+
assert_equal false, logline.cached? # Method alias
|
15
|
+
assert_equal "200", logline.http_resp_code
|
16
|
+
assert_equal 562, logline.bytes
|
17
|
+
end
|
18
|
+
|
19
|
+
test "cached status" do
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
test "page view status" do
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
test "blocked status" do
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: squiggle
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Daniel Draper
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-02-03 00:00:00 +10:30
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: domainatrix
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :runtime
|
31
|
+
version_requirements: *id001
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: activesupport
|
34
|
+
prerelease: false
|
35
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ~>
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
segments:
|
40
|
+
- 3
|
41
|
+
- 0
|
42
|
+
- 3
|
43
|
+
version: 3.0.3
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rubyforge
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
segments:
|
54
|
+
- 2
|
55
|
+
- 0
|
56
|
+
- 4
|
57
|
+
version: 2.0.4
|
58
|
+
type: :development
|
59
|
+
version_requirements: *id003
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: hoe
|
62
|
+
prerelease: false
|
63
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 2
|
69
|
+
- 6
|
70
|
+
- 0
|
71
|
+
version: 2.6.0
|
72
|
+
type: :development
|
73
|
+
version_requirements: *id004
|
74
|
+
description: Log line parser
|
75
|
+
email:
|
76
|
+
- daniel@netfox.com
|
77
|
+
executables: []
|
78
|
+
|
79
|
+
extensions: []
|
80
|
+
|
81
|
+
extra_rdoc_files:
|
82
|
+
- History.txt
|
83
|
+
- Manifest.txt
|
84
|
+
- PostInstall.txt
|
85
|
+
files:
|
86
|
+
- History.txt
|
87
|
+
- Manifest.txt
|
88
|
+
- PostInstall.txt
|
89
|
+
- README.rdoc
|
90
|
+
- Rakefile
|
91
|
+
- lib/squiggle.rb
|
92
|
+
- lib/squiggle/base.rb
|
93
|
+
- lib/squiggle/chunk_parser.rb
|
94
|
+
- lib/squiggle/domain_parser.rb
|
95
|
+
- lib/squiggle/log_line.rb
|
96
|
+
- lib/squiggle/squid_standard_parser.rb
|
97
|
+
- script/console
|
98
|
+
- script/destroy
|
99
|
+
- script/generate
|
100
|
+
- test/test_helper.rb
|
101
|
+
has_rdoc: true
|
102
|
+
homepage: http://github.com/#{github_username}/#{project_name}
|
103
|
+
licenses: []
|
104
|
+
|
105
|
+
post_install_message: PostInstall.txt
|
106
|
+
rdoc_options:
|
107
|
+
- --main
|
108
|
+
- README.rdoc
|
109
|
+
require_paths:
|
110
|
+
- lib
|
111
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
segments:
|
116
|
+
- 0
|
117
|
+
version: "0"
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
segments:
|
123
|
+
- 0
|
124
|
+
version: "0"
|
125
|
+
requirements: []
|
126
|
+
|
127
|
+
rubyforge_project: squiggle
|
128
|
+
rubygems_version: 1.3.6
|
129
|
+
signing_key:
|
130
|
+
specification_version: 3
|
131
|
+
summary: Log line parser
|
132
|
+
test_files:
|
133
|
+
- test/test_domain_parser.rb
|
134
|
+
- test/test_helper.rb
|
135
|
+
- test/test_log_line.rb
|
136
|
+
- test/test_squid_parser.rb
|
137
|
+
- test/test_squiggle.rb
|