danielsdeleo-teeth 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +26 -0
- data/Rakefile +88 -0
- data/ext/extconf.rb +4 -0
- data/ext/tokenize_apache_logs.yy +215 -0
- data/ext/tokenize_apache_logs.yy.c +12067 -0
- data/lib/teeth.rb +1 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/unit/tokenize_apache_spec.rb +106 -0
- metadata +67 -0
data/lib/teeth.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "teeth/tokenize_apache_logs"
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
-c
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'teeth/tokenize_apache_logs'
|
2
|
+
|
3
|
+
def be_greater_than(expected)
|
4
|
+
simple_matcher("be greater than #{expected.to_s}") do |given, matcher|
|
5
|
+
matcher.failure_message = "expected #{given.to_s} to be greater than #{expected.to_s}"
|
6
|
+
matcher.negative_failure_message = "expected #{given.to_s} to not be greater than #{expected.to_s}"
|
7
|
+
given > expected
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
$INCLUDE_SLOW_TESTS = true
|
3
|
+
|
4
|
+
describe "Apache Lexer Extension", "when lexing apache errors" do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
str = "[Sun Nov 30 14:23:45 2008] [error] [client 10.0.1.197] Invalid URI in request GET .\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini HTTP/1.1"
|
8
|
+
@tokens = str.tokenize_apache_logs
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should return an uuid and empty message for an empty string" do
|
12
|
+
tokens = "".tokenize_apache_logs
|
13
|
+
tokens[:message].should == ""
|
14
|
+
tokens[:id].should match(/[0-9A-F]{32}/)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should extract an IP address" do
|
18
|
+
@tokens[:ipv4_addr].first.should == "10.0.1.197"
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should extract an apache datetime" do
|
22
|
+
@tokens[:apache_err_datetime].first.should == "Sun Nov 30 14:23:45 2008"
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should extract the error level" do
|
26
|
+
@tokens[:error_level].first.should == "error"
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should extract the URI" do
|
30
|
+
@tokens[:relative_url].first.should == ".\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should error out if the string is longer than 1M chars" do
|
34
|
+
str = ((("abcDE" * 2) * 1000) * 100) + "X"
|
35
|
+
lambda {str.tokenize_apache_logs[:word]}.should raise_error(ArgumentError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars")
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
describe "Apache Lexer Extension", "when lexing apache access logs" do
|
41
|
+
before(:each) do
|
42
|
+
str = %q{couchdb.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
|
43
|
+
@tokens = str.tokenize_apache_logs
|
44
|
+
str2 = %q{127.162.219.29 - - [14/Jan/2009:15:32:32 -0500] "GET /reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt?? HTTP/1.1" 404 5636}
|
45
|
+
@tokens2 = str2.tokenize_apache_logs
|
46
|
+
str3 = %q{127.81.248.53 - - [14/Jan/2009:11:49:43 -0500] "GET /reports/REPORT7_1ART02.pdf HTTP/1.1" 206 255404}
|
47
|
+
@tokens3 = str3.tokenize_apache_logs
|
48
|
+
str4 = %q{127.140.136.56 - - [23/Jan/2009:12:59:24 -0500] "GET /scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir" 404 5607}
|
49
|
+
@tokens4 = str4.tokenize_apache_logs
|
50
|
+
str5 = %q{127.254.43.205 - - [26/Jan/2009:08:32:08 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5673}
|
51
|
+
@tokens5 = str5.tokenize_apache_logs
|
52
|
+
str6 = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5721}
|
53
|
+
@tokens6 = str6.tokenize_apache_logs
|
54
|
+
str_naked_url = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET / HTTP/1.1" 404 5721}
|
55
|
+
@tokens_naked_url = str_naked_url.tokenize_apache_logs
|
56
|
+
end
|
57
|
+
|
58
|
+
it "provides hints for testing" do
|
59
|
+
#puts "\n" + @tokens.inspect + "\n"
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should extract the vhost name" do
|
63
|
+
@tokens[:host].first.should == "couchdb.localdomain:80"
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should extract the datetime" do
|
67
|
+
@tokens[:apache_access_datetime].first.should == "13/Dec/2008:19:26:11 -0500"
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should extract the HTTP response code" do
|
71
|
+
@tokens[:http_response].first.should == "404"
|
72
|
+
#(100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
|
73
|
+
codes = ['100', '101'] + (200 .. 206).map { |n| n.to_s } +
|
74
|
+
(300 .. 305).map { |n| n.to_s } + ['307'] + (400 .. 417).map { |n| n.to_s } +
|
75
|
+
(500 .. 505).map { |n| n.to_s }
|
76
|
+
codes.each do |code|
|
77
|
+
code.tokenize_apache_logs[:http_response].first.should == code
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should extract the HTTP version" do
|
82
|
+
@tokens[:http_version].first.should == "HTTP/1.1"
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should extract the browser string with quotes removed" do
|
86
|
+
@tokens[:browser_string].first.should == "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should not extract an HTTP code when a HTTP response code number appears in the bytes transferred" do
|
90
|
+
#puts "\nTOKENS3:\n" + @tokens3.inspect
|
91
|
+
@tokens3[:http_response].include?("404").should_not be_true
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should correctly identify gnarly URLs from web attacks as URLs" do
|
95
|
+
#puts "\nTOKENS2:\n" + @tokens2.inspect
|
96
|
+
@tokens2[:relative_url].first.should == "/reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt??"
|
97
|
+
@tokens4[:relative_url].first.should == "/scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir"
|
98
|
+
@tokens5[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00"
|
99
|
+
@tokens6[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00"
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should correctly extract ``/'' as a URL" do
|
103
|
+
@tokens_naked_url[:relative_url].should == ["/"]
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: danielsdeleo-teeth
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Daniel DeLeo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-21 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Fast log file parsing in Ruby
|
17
|
+
email: ddeleo@basecommander.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- ext/extconf.rb
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
files:
|
25
|
+
- README.rdoc
|
26
|
+
- Rakefile
|
27
|
+
- ext/extconf.rb
|
28
|
+
- ext/tokenize_apache_logs.yy
|
29
|
+
- ext/tokenize_apache_logs.yy.c
|
30
|
+
- lib/teeth.rb
|
31
|
+
- spec/fixtures/access.log
|
32
|
+
- spec/fixtures/big-access.log
|
33
|
+
- spec/fixtures/big-error.log
|
34
|
+
- spec/fixtures/error.log
|
35
|
+
- spec/fixtures/med-error.log
|
36
|
+
- spec/spec.opts
|
37
|
+
- spec/spec_helper.rb
|
38
|
+
- spec/unit/tokenize_apache_spec.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/danielsdeleo/teeth
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options:
|
43
|
+
- --inline-source
|
44
|
+
- --charset=UTF-8
|
45
|
+
require_paths:
|
46
|
+
- - lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - <=
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 1.9.0
|
52
|
+
version:
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
58
|
+
version:
|
59
|
+
requirements: []
|
60
|
+
|
61
|
+
rubyforge_project: bloomfilter
|
62
|
+
rubygems_version: 1.2.0
|
63
|
+
signing_key:
|
64
|
+
specification_version: 2
|
65
|
+
summary: Fast log file parsing in Ruby
|
66
|
+
test_files: []
|
67
|
+
|