danielsdeleo-teeth 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/teeth.rb ADDED
@@ -0,0 +1 @@
1
+ require "teeth/tokenize_apache_logs"
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ -c
@@ -0,0 +1,10 @@
1
+ require 'teeth/tokenize_apache_logs'
2
+
3
+ def be_greater_than(expected)
4
+ simple_matcher("be greater than #{expected.to_s}") do |given, matcher|
5
+ matcher.failure_message = "expected #{given.to_s} to be greater than #{expected.to_s}"
6
+ matcher.negative_failure_message = "expected #{given.to_s} to not be greater than #{expected.to_s}"
7
+ given > expected
8
+ end
9
+
10
+ end
@@ -0,0 +1,106 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+ $INCLUDE_SLOW_TESTS = true
3
+
4
+ describe "Apache Lexer Extension", "when lexing apache errors" do
5
+
6
+ before(:each) do
7
+ str = "[Sun Nov 30 14:23:45 2008] [error] [client 10.0.1.197] Invalid URI in request GET .\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini HTTP/1.1"
8
+ @tokens = str.tokenize_apache_logs
9
+ end
10
+
11
+ it "should return an uuid and empty message for an empty string" do
12
+ tokens = "".tokenize_apache_logs
13
+ tokens[:message].should == ""
14
+ tokens[:id].should match(/[0-9A-F]{32}/)
15
+ end
16
+
17
+ it "should extract an IP address" do
18
+ @tokens[:ipv4_addr].first.should == "10.0.1.197"
19
+ end
20
+
21
+ it "should extract an apache datetime" do
22
+ @tokens[:apache_err_datetime].first.should == "Sun Nov 30 14:23:45 2008"
23
+ end
24
+
25
+ it "should extract the error level" do
26
+ @tokens[:error_level].first.should == "error"
27
+ end
28
+
29
+ it "should extract the URI" do
30
+ @tokens[:relative_url].first.should == ".\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini"
31
+ end
32
+
33
+ it "should error out if the string is longer than 1M chars" do
34
+ str = ((("abcDE" * 2) * 1000) * 100) + "X"
35
+ lambda {str.tokenize_apache_logs[:word]}.should raise_error(ArgumentError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars")
36
+ end
37
+
38
+ end
39
+
40
+ describe "Apache Lexer Extension", "when lexing apache access logs" do
41
+ before(:each) do
42
+ str = %q{couchdb.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
43
+ @tokens = str.tokenize_apache_logs
44
+ str2 = %q{127.162.219.29 - - [14/Jan/2009:15:32:32 -0500] "GET /reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt?? HTTP/1.1" 404 5636}
45
+ @tokens2 = str2.tokenize_apache_logs
46
+ str3 = %q{127.81.248.53 - - [14/Jan/2009:11:49:43 -0500] "GET /reports/REPORT7_1ART02.pdf HTTP/1.1" 206 255404}
47
+ @tokens3 = str3.tokenize_apache_logs
48
+ str4 = %q{127.140.136.56 - - [23/Jan/2009:12:59:24 -0500] "GET /scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir" 404 5607}
49
+ @tokens4 = str4.tokenize_apache_logs
50
+ str5 = %q{127.254.43.205 - - [26/Jan/2009:08:32:08 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5673}
51
+ @tokens5 = str5.tokenize_apache_logs
52
+ str6 = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5721}
53
+ @tokens6 = str6.tokenize_apache_logs
54
+ str_naked_url = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET / HTTP/1.1" 404 5721}
55
+ @tokens_naked_url = str_naked_url.tokenize_apache_logs
56
+ end
57
+
58
+ it "provides hints for testing" do
59
+ #puts "\n" + @tokens.inspect + "\n"
60
+ end
61
+
62
+ it "should extract the vhost name" do
63
+ @tokens[:host].first.should == "couchdb.localdomain:80"
64
+ end
65
+
66
+ it "should extract the datetime" do
67
+ @tokens[:apache_access_datetime].first.should == "13/Dec/2008:19:26:11 -0500"
68
+ end
69
+
70
+ it "should extract the HTTP response code" do
71
+ @tokens[:http_response].first.should == "404"
72
+ #(100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
73
+ codes = ['100', '101'] + (200 .. 206).map { |n| n.to_s } +
74
+ (300 .. 305).map { |n| n.to_s } + ['307'] + (400 .. 417).map { |n| n.to_s } +
75
+ (500 .. 505).map { |n| n.to_s }
76
+ codes.each do |code|
77
+ code.tokenize_apache_logs[:http_response].first.should == code
78
+ end
79
+ end
80
+
81
+ it "should extract the HTTP version" do
82
+ @tokens[:http_version].first.should == "HTTP/1.1"
83
+ end
84
+
85
+ it "should extract the browser string with quotes removed" do
86
+ @tokens[:browser_string].first.should == "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
87
+ end
88
+
89
+ it "should not extract an HTTP code when a HTTP response code number appears in the bytes transferred" do
90
+ #puts "\nTOKENS3:\n" + @tokens3.inspect
91
+ @tokens3[:http_response].include?("404").should_not be_true
92
+ end
93
+
94
+ it "should correctly identify gnarly URLs from web attacks as URLs" do
95
+ #puts "\nTOKENS2:\n" + @tokens2.inspect
96
+ @tokens2[:relative_url].first.should == "/reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt??"
97
+ @tokens4[:relative_url].first.should == "/scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir"
98
+ @tokens5[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00"
99
+ @tokens6[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00"
100
+ end
101
+
102
+ it "should correctly extract ``/'' as a URL" do
103
+ @tokens_naked_url[:relative_url].should == ["/"]
104
+ end
105
+
106
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: danielsdeleo-teeth
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Daniel DeLeo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-21 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Fast log file parsing in Ruby
17
+ email: ddeleo@basecommander.net
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ files:
25
+ - README.rdoc
26
+ - Rakefile
27
+ - ext/extconf.rb
28
+ - ext/tokenize_apache_logs.yy
29
+ - ext/tokenize_apache_logs.yy.c
30
+ - lib/teeth.rb
31
+ - spec/fixtures/access.log
32
+ - spec/fixtures/big-access.log
33
+ - spec/fixtures/big-error.log
34
+ - spec/fixtures/error.log
35
+ - spec/fixtures/med-error.log
36
+ - spec/spec.opts
37
+ - spec/spec_helper.rb
38
+ - spec/unit/tokenize_apache_spec.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/danielsdeleo/teeth
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --inline-source
44
+ - --charset=UTF-8
45
+ require_paths:
46
+ - - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - <=
50
+ - !ruby/object:Gem::Version
51
+ version: 1.9.0
52
+ version:
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ version:
59
+ requirements: []
60
+
61
+ rubyforge_project: bloomfilter
62
+ rubygems_version: 1.2.0
63
+ signing_key:
64
+ specification_version: 2
65
+ summary: Fast log file parsing in Ruby
66
+ test_files: []
67
+