danielsdeleo-teeth 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +26 -0
- data/Rakefile +88 -0
- data/ext/extconf.rb +4 -0
- data/ext/tokenize_apache_logs.yy +215 -0
- data/ext/tokenize_apache_logs.yy.c +12067 -0
- data/lib/teeth.rb +1 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/unit/tokenize_apache_spec.rb +106 -0
- metadata +67 -0
data/lib/teeth.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "teeth/tokenize_apache_logs"
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
-c
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'teeth/tokenize_apache_logs'
|
2
|
+
|
3
|
+
def be_greater_than(expected)
|
4
|
+
simple_matcher("be greater than #{expected.to_s}") do |given, matcher|
|
5
|
+
matcher.failure_message = "expected #{given.to_s} to be greater than #{expected.to_s}"
|
6
|
+
matcher.negative_failure_message = "expected #{given.to_s} to not be greater than #{expected.to_s}"
|
7
|
+
given > expected
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
$INCLUDE_SLOW_TESTS = true
|
3
|
+
|
4
|
+
describe "Apache Lexer Extension", "when lexing apache errors" do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
str = "[Sun Nov 30 14:23:45 2008] [error] [client 10.0.1.197] Invalid URI in request GET .\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini HTTP/1.1"
|
8
|
+
@tokens = str.tokenize_apache_logs
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should return an uuid and empty message for an empty string" do
|
12
|
+
tokens = "".tokenize_apache_logs
|
13
|
+
tokens[:message].should == ""
|
14
|
+
tokens[:id].should match(/[0-9A-F]{32}/)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should extract an IP address" do
|
18
|
+
@tokens[:ipv4_addr].first.should == "10.0.1.197"
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should extract an apache datetime" do
|
22
|
+
@tokens[:apache_err_datetime].first.should == "Sun Nov 30 14:23:45 2008"
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should extract the error level" do
|
26
|
+
@tokens[:error_level].first.should == "error"
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should extract the URI" do
|
30
|
+
@tokens[:relative_url].first.should == ".\\.\\.\\.\\.\\.\\.\\.\\.\\.\\/winnt/win.ini"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should error out if the string is longer than 1M chars" do
|
34
|
+
str = ((("abcDE" * 2) * 1000) * 100) + "X"
|
35
|
+
lambda {str.tokenize_apache_logs[:word]}.should raise_error(ArgumentError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars")
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
describe "Apache Lexer Extension", "when lexing apache access logs" do
|
41
|
+
before(:each) do
|
42
|
+
str = %q{couchdb.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
|
43
|
+
@tokens = str.tokenize_apache_logs
|
44
|
+
str2 = %q{127.162.219.29 - - [14/Jan/2009:15:32:32 -0500] "GET /reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt?? HTTP/1.1" 404 5636}
|
45
|
+
@tokens2 = str2.tokenize_apache_logs
|
46
|
+
str3 = %q{127.81.248.53 - - [14/Jan/2009:11:49:43 -0500] "GET /reports/REPORT7_1ART02.pdf HTTP/1.1" 206 255404}
|
47
|
+
@tokens3 = str3.tokenize_apache_logs
|
48
|
+
str4 = %q{127.140.136.56 - - [23/Jan/2009:12:59:24 -0500] "GET /scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir" 404 5607}
|
49
|
+
@tokens4 = str4.tokenize_apache_logs
|
50
|
+
str5 = %q{127.254.43.205 - - [26/Jan/2009:08:32:08 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5673}
|
51
|
+
@tokens5 = str5.tokenize_apache_logs
|
52
|
+
str6 = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET /reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00 HTTP/1.1" 404 5721}
|
53
|
+
@tokens6 = str6.tokenize_apache_logs
|
54
|
+
str_naked_url = %q{127.218.234.82 - - [26/Jan/2009:08:32:19 -0500] "GET / HTTP/1.1" 404 5721}
|
55
|
+
@tokens_naked_url = str_naked_url.tokenize_apache_logs
|
56
|
+
end
|
57
|
+
|
58
|
+
it "provides hints for testing" do
|
59
|
+
#puts "\n" + @tokens.inspect + "\n"
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should extract the vhost name" do
|
63
|
+
@tokens[:host].first.should == "couchdb.localdomain:80"
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should extract the datetime" do
|
67
|
+
@tokens[:apache_access_datetime].first.should == "13/Dec/2008:19:26:11 -0500"
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should extract the HTTP response code" do
|
71
|
+
@tokens[:http_response].first.should == "404"
|
72
|
+
#(100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
|
73
|
+
codes = ['100', '101'] + (200 .. 206).map { |n| n.to_s } +
|
74
|
+
(300 .. 305).map { |n| n.to_s } + ['307'] + (400 .. 417).map { |n| n.to_s } +
|
75
|
+
(500 .. 505).map { |n| n.to_s }
|
76
|
+
codes.each do |code|
|
77
|
+
code.tokenize_apache_logs[:http_response].first.should == code
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should extract the HTTP version" do
|
82
|
+
@tokens[:http_version].first.should == "HTTP/1.1"
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should extract the browser string with quotes removed" do
|
86
|
+
@tokens[:browser_string].first.should == "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should not extract an HTTP code when a HTTP response code number appears in the bytes transferred" do
|
90
|
+
#puts "\nTOKENS3:\n" + @tokens3.inspect
|
91
|
+
@tokens3[:http_response].include?("404").should_not be_true
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should correctly identify gnarly URLs from web attacks as URLs" do
|
95
|
+
#puts "\nTOKENS2:\n" + @tokens2.inspect
|
96
|
+
@tokens2[:relative_url].first.should == "/reports//ee_commerce/paypalcart.php?toroot=http://www.shenlishi.com//skin/fxid1.txt??"
|
97
|
+
@tokens4[:relative_url].first.should == "/scripts/..%255c%255c../winnt/system32/cmd.exe?/c+dir"
|
98
|
+
@tokens5[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/footer.php?admin_template_default=../../../../../../../../../../../../../etc/passwd%00"
|
99
|
+
@tokens6[:relative_url].first.should == "/reports/REPORT9_3.pdf//admin/includes/header.php?bypass_installed=1&bypass_restrict=1&row_secure[account_theme]=../../../../../../../../../../../../../etc/passwd%00"
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should correctly extract ``/'' as a URL" do
|
103
|
+
@tokens_naked_url[:relative_url].should == ["/"]
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: danielsdeleo-teeth
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Daniel DeLeo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-21 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Fast log file parsing in Ruby
|
17
|
+
email: ddeleo@basecommander.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions:
|
21
|
+
- ext/extconf.rb
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
files:
|
25
|
+
- README.rdoc
|
26
|
+
- Rakefile
|
27
|
+
- ext/extconf.rb
|
28
|
+
- ext/tokenize_apache_logs.yy
|
29
|
+
- ext/tokenize_apache_logs.yy.c
|
30
|
+
- lib/teeth.rb
|
31
|
+
- spec/fixtures/access.log
|
32
|
+
- spec/fixtures/big-access.log
|
33
|
+
- spec/fixtures/big-error.log
|
34
|
+
- spec/fixtures/error.log
|
35
|
+
- spec/fixtures/med-error.log
|
36
|
+
- spec/spec.opts
|
37
|
+
- spec/spec_helper.rb
|
38
|
+
- spec/unit/tokenize_apache_spec.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/danielsdeleo/teeth
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options:
|
43
|
+
- --inline-source
|
44
|
+
- --charset=UTF-8
|
45
|
+
require_paths:
|
46
|
+
- - lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - <=
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 1.9.0
|
52
|
+
version:
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
58
|
+
version:
|
59
|
+
requirements: []
|
60
|
+
|
61
|
+
rubyforge_project: bloomfilter
|
62
|
+
rubygems_version: 1.2.0
|
63
|
+
signing_key:
|
64
|
+
specification_version: 2
|
65
|
+
summary: Fast log file parsing in Ruby
|
66
|
+
test_files: []
|
67
|
+
|