jls-grok 0.4.7 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/Grok.rb +3 -0
- data/lib/grok-pure.rb +137 -0
- data/lib/grok.rb +3 -2
- data/lib/grok/{match.rb → c-ext/match.rb} +0 -0
- data/lib/grok/{pile.rb → c-ext/pile.rb} +0 -0
- data/lib/grok/pure/match.rb +45 -0
- data/lib/grok/pure/pile.rb +56 -0
- metadata +9 -29
- data/INSTALL +0 -12
- data/Rakefile +0 -12
- data/examples/grok-web.rb +0 -131
- data/examples/pattern-discovery.rb +0 -39
- data/examples/test.rb +0 -30
- data/grok.gemspec +0 -55
- data/test/Makefile +0 -17
- data/test/alltests.rb +0 -8
- data/test/general/basic_test.rb +0 -58
- data/test/general/captures_test.rb +0 -105
- data/test/patterns/day.rb +0 -22
- data/test/patterns/host.rb +0 -30
- data/test/patterns/ip.input +0 -10000
- data/test/patterns/ip.rb +0 -31
- data/test/patterns/iso8601.rb +0 -68
- data/test/patterns/month.rb +0 -24
- data/test/patterns/number.rb +0 -69
- data/test/patterns/path.rb +0 -31
- data/test/patterns/prog.rb +0 -21
- data/test/patterns/quotedstring.rb +0 -54
- data/test/patterns/uri.rb +0 -46
- data/test/run.sh +0 -3
- data/test/speedtest.rb +0 -58
data/test/patterns/ip.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class IPPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_ips
|
12
|
-
@grok.compile("%{IP}")
|
13
|
-
File.open("#{File.dirname(__FILE__)}/ip.input").each do |line|
|
14
|
-
line.chomp!
|
15
|
-
match = @grok.match(line)
|
16
|
-
assert_not_equal(false, match)
|
17
|
-
assert_equal(line, match.captures["IP"][0])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_non_ips
|
22
|
-
@grok.compile("%{IP}")
|
23
|
-
nonips = %w{255.255.255.256 0.1.a.33 300.1.2.3 300 400.4.3.a 1.2.3.b
|
24
|
-
1..3.4.5 hello world}
|
25
|
-
nonips << "hello world"
|
26
|
-
nonips.each do |input|
|
27
|
-
match = @grok.match(input)
|
28
|
-
assert_equal(false, match)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
data/test/patterns/iso8601.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class ISO8601PatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("^%{TIMESTAMP_ISO8601}$")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_iso8601
|
13
|
-
times = [
|
14
|
-
"2001-01-01T00:00:00",
|
15
|
-
"1974-03-02T04:09:09",
|
16
|
-
"2010-05-03T08:18:18+00:00",
|
17
|
-
"2004-07-04T12:27:27-00:00",
|
18
|
-
"2001-09-05T16:36:36+0000",
|
19
|
-
"2001-11-06T20:45:45-0000",
|
20
|
-
"2001-12-07T23:54:54Z",
|
21
|
-
"2001-01-01T00:00:00.123456",
|
22
|
-
"1974-03-02T04:09:09.123456",
|
23
|
-
"2010-05-03T08:18:18.123456+00:00",
|
24
|
-
"2004-07-04T12:27:27.123456-00:00",
|
25
|
-
"2001-09-05T16:36:36.123456+0000",
|
26
|
-
"2001-11-06T20:45:45.123456-0000",
|
27
|
-
"2001-12-07T23:54:54.123456Z",
|
28
|
-
"2001-12-07T23:54:60.123456Z", # '60' second is a leap second.
|
29
|
-
]
|
30
|
-
times.each do |time|
|
31
|
-
match = @grok.match(time)
|
32
|
-
assert_not_equal(false, match, "Expected #{time} to match TIMESTAMP_ISO8601")
|
33
|
-
assert_equal(time, match.captures["TIMESTAMP_ISO8601"][0])
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_iso8601_nomatch
|
38
|
-
times = [
|
39
|
-
"2001-13-01T00:00:00", # invalid month
|
40
|
-
"2001-00-01T00:00:00", # invalid month
|
41
|
-
"2001-01-00T00:00:00", # invalid day
|
42
|
-
"2001-01-32T00:00:00", # invalid day
|
43
|
-
"2001-01-aT00:00:00", # invalid day
|
44
|
-
"2001-01-1aT00:00:00", # invalid day
|
45
|
-
"2001-01-01Ta0:00:00", # invalid hour
|
46
|
-
"2001-01-01T0:00:00", # invalid hour
|
47
|
-
"2001-01-01T25:00:00", # invalid hour
|
48
|
-
"2001-01-01T01:60:00", # invalid minute
|
49
|
-
"2001-01-01T00:aa:00", # invalid minute
|
50
|
-
"2001-01-01T00:00:aa", # invalid second
|
51
|
-
"2001-01-01T00:00:-1", # invalid second
|
52
|
-
"2001-01-01T00:00:61", # invalid second
|
53
|
-
"2001-01-01T00:00:00A", # invalid timezone
|
54
|
-
"2001-01-01T00:00:00+", # invalid timezone
|
55
|
-
"2001-01-01T00:00:00+25", # invalid timezone
|
56
|
-
"2001-01-01T00:00:00+2500", # invalid timezone
|
57
|
-
"2001-01-01T00:00:00+25:00", # invalid timezone
|
58
|
-
"2001-01-01T00:00:00-25", # invalid timezone
|
59
|
-
"2001-01-01T00:00:00-2500", # invalid timezone
|
60
|
-
"2001-01-01T00:00:00-00:61", # invalid timezone
|
61
|
-
]
|
62
|
-
times.each do |time|
|
63
|
-
match = @grok.match(time)
|
64
|
-
assert_equal(false, match, "Expected #{time} to not match TIMESTAMP_ISO8601")
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
data/test/patterns/month.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class MonthPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("%{MONTH}")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_months
|
13
|
-
months = ["Jan", "January", "Feb", "February", "Mar", "March", "Apr",
|
14
|
-
"April", "May", "Jun", "June", "Jul", "July", "Aug", "August",
|
15
|
-
"Sep", "September", "Oct", "October", "Nov", "November", "Dec",
|
16
|
-
"December"]
|
17
|
-
months.each do |month|
|
18
|
-
match = @grok.match(month)
|
19
|
-
assert_not_equal(false, match, "Expected #{month} to match")
|
20
|
-
assert_equal(month, match.captures["MONTH"][0])
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
data/test/patterns/number.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class NumberPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_match_number
|
12
|
-
@grok.compile("%{NUMBER}")
|
13
|
-
# step of a prime number near 100 so we get about 2000 iterations
|
14
|
-
#puts @grok.expanded_pattern.inspect
|
15
|
-
-100000.step(100000, 97) do |value|
|
16
|
-
match = @grok.match(value.to_s)
|
17
|
-
assert_not_equal(false, match, "#{value} should not match false")
|
18
|
-
assert_equal(value.to_s, match.captures["NUMBER"][0])
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_match_number_float
|
23
|
-
# generate some random floating point values
|
24
|
-
# always seed with the same random number, so the test is always the same
|
25
|
-
srand(0)
|
26
|
-
@grok.compile("%{NUMBER}")
|
27
|
-
0.upto(1000) do |value|
|
28
|
-
value = (rand * 100000 - 50000).to_s
|
29
|
-
match = @grok.match(value)
|
30
|
-
assert_not_equal(false, match)
|
31
|
-
assert_equal(value, match.captures["NUMBER"][0])
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_match_number_amid_things
|
36
|
-
@grok.compile("%{NUMBER}")
|
37
|
-
value = "hello 12345 world"
|
38
|
-
match = @grok.match(value)
|
39
|
-
assert_not_equal(false, match)
|
40
|
-
assert_equal("12345", match.captures["NUMBER"][0])
|
41
|
-
|
42
|
-
value = "Something costs $55.4!"
|
43
|
-
match = @grok.match(value)
|
44
|
-
assert_not_equal(false, match)
|
45
|
-
assert_equal("55.4", match.captures["NUMBER"][0])
|
46
|
-
end
|
47
|
-
|
48
|
-
def test_no_match_number
|
49
|
-
@grok.compile("%{NUMBER}")
|
50
|
-
["foo", "", " ", ".", "hello world", "-abcd"].each do |value|
|
51
|
-
match = @grok.match(value.to_s)
|
52
|
-
assert_equal(false, match)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_match_base16num
|
57
|
-
@grok.compile("%{BASE16NUM}")
|
58
|
-
# Ruby represents negative values in a strange way, so only
|
59
|
-
# test positive numbers for now.
|
60
|
-
# I don't think anyone uses negative values in hex anyway...
|
61
|
-
0.upto(1000) do |value|
|
62
|
-
[("%x" % value), ("0x%08x" % value), ("%016x" % value)].each do |hexstr|
|
63
|
-
match = @grok.match(hexstr)
|
64
|
-
assert_not_equal(false, match)
|
65
|
-
assert_equal(hexstr, match.captures["BASE16NUM"][0])
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/test/patterns/path.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class PathPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("%{PATH}")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_unix_paths
|
13
|
-
paths = %w{/ /usr /usr/bin /usr/bin/foo /etc/motd /home/.test
|
14
|
-
/foo/bar//baz //testing /.test /%foo% /asdf/asdf,v}
|
15
|
-
paths.each do |path|
|
16
|
-
match = @grok.match(path)
|
17
|
-
assert_not_equal(false, match)
|
18
|
-
assert_equal(path, match.captures["PATH"][0])
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_windows_paths
|
23
|
-
paths = %w{C:\WINDOWS \\\\Foo\bar \\\\1.2.3.4\C$ \\\\some\path\here.exe}
|
24
|
-
paths << "C:\\Documents and Settings\\"
|
25
|
-
paths.each do |path|
|
26
|
-
match = @grok.match(path)
|
27
|
-
assert_not_equal(false, match, "Expected #{path} to match, but it didn't.")
|
28
|
-
assert_equal(path, match.captures["PATH"][0])
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
data/test/patterns/prog.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class ProgPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("^%{PROG}$")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_progs
|
13
|
-
progs = %w{kernel foo-bar foo_bar foo/bar/baz}
|
14
|
-
progs.each do |prog|
|
15
|
-
match = @grok.match(prog)
|
16
|
-
assert_not_equal(false, prog, "Expected #{prog} to match.")
|
17
|
-
assert_equal(prog, match.captures["PROG"][0], "Expected #{prog} to match capture.")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
#require 'rubygems'
|
2
|
-
require 'grok'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class QuotedStringPatternsTest < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
@grok = Grok.new
|
8
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
9
|
-
@grok.add_patterns_from_file(path)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_quoted_string_common
|
13
|
-
@grok.compile("%{QUOTEDSTRING}")
|
14
|
-
inputs = ["hello", ""]
|
15
|
-
quotes = %w{" ' `}
|
16
|
-
inputs.each do |value|
|
17
|
-
quotes.each do |quote|
|
18
|
-
str = "#{quote}#{value}#{quote}"
|
19
|
-
match = @grok.match(str)
|
20
|
-
assert_not_equal(false, match)
|
21
|
-
assert_equal(str, match.captures["QUOTEDSTRING"][0])
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_quoted_string_inside_escape
|
27
|
-
@grok.compile("%{QUOTEDSTRING}")
|
28
|
-
quotes = %w{" ' `}
|
29
|
-
quotes.each do |quote|
|
30
|
-
str = "#{quote}hello \\#{quote}world\\#{quote}#{quote}"
|
31
|
-
match = @grok.match(str)
|
32
|
-
assert_not_equal(false, match)
|
33
|
-
assert_equal(str, match.captures["QUOTEDSTRING"][0])
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_escaped_quotes_no_match_quoted_string
|
38
|
-
@grok.compile("%{QUOTEDSTRING}")
|
39
|
-
inputs = ["\\\"testing\\\"", "\\\'testing\\\'", "\\\`testing\\\`",]
|
40
|
-
inputs.each do |value|
|
41
|
-
match = @grok.match(value)
|
42
|
-
assert_equal(false, match)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_non_quoted_strings_no_match
|
47
|
-
@grok.compile("%{QUOTEDSTRING}")
|
48
|
-
inputs = ["\\\"testing", "testing", "hello world ' something ` foo"]
|
49
|
-
inputs.each do |value|
|
50
|
-
match = @grok.match(value)
|
51
|
-
assert_equal(false, match)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
data/test/patterns/uri.rb
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class URIPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("%{URI}")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_urls
|
13
|
-
urls = ["http://www.google.com", "telnet://helloworld",
|
14
|
-
"http://www.example.com/", "http://www.example.com/test.html",
|
15
|
-
"http://www.example.com/test.html?foo=bar",
|
16
|
-
"http://www.example.com/test.html?foo=bar&fizzle=baz",
|
17
|
-
"http://www.example.com:80/test.html?foo=bar&fizzle=baz",
|
18
|
-
"https://www.example.com:443/test.html?foo=bar&fizzle=baz",
|
19
|
-
"https://user@www.example.com:443/test.html?foo=bar&fizzle=baz",
|
20
|
-
"https://user:pass@somehost/fetch.pl",
|
21
|
-
"puppet:///",
|
22
|
-
"http://www.foo.com",
|
23
|
-
"http://www.foo.com/",
|
24
|
-
"http://www.foo.com/?testing",
|
25
|
-
"http://www.foo.com/?one=two",
|
26
|
-
"http://www.foo.com/?one=two&foo=bar",
|
27
|
-
"foo://somehost.com:12345",
|
28
|
-
"foo://user@somehost.com:12345",
|
29
|
-
"foo://user@somehost.com:12345/",
|
30
|
-
"foo://user@somehost.com:12345/foo.bar/baz/fizz",
|
31
|
-
"foo://user@somehost.com:12345/foo.bar/baz/fizz?test",
|
32
|
-
"foo://user@somehost.com:12345/foo.bar/baz/fizz?test=1&sink&foo=4",
|
33
|
-
"http://www.google.com/search?hl=en&source=hp&q=hello+world+%5E%40%23%24&btnG=Google+Search",
|
34
|
-
"http://www.freebsd.org/cgi/url.cgi?ports/sysutils/grok/pkg-descr",
|
35
|
-
"http://www.google.com/search?q=CAPTCHA+ssh&start=0&ie=utf-8&oe=utf-8&client=firefox-a&rls=org.mozilla:en-US:official",
|
36
|
-
"svn+ssh://somehost:12345/testing",
|
37
|
-
]
|
38
|
-
|
39
|
-
urls.each do |url|
|
40
|
-
match = @grok.match(url)
|
41
|
-
assert_not_equal(false, match, "Expected this to match: #{url}")
|
42
|
-
assert_equal(url, match.captures["URI"][0])
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
data/test/run.sh
DELETED
data/test/speedtest.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'grok'
|
5
|
-
#require 'ruby-prof'
|
6
|
-
require 'pp'
|
7
|
-
|
8
|
-
#RubyProf.start
|
9
|
-
|
10
|
-
iterations = 100000
|
11
|
-
pattern = "[A-z0-9_-]*\\[[0-9]+\\]"
|
12
|
-
|
13
|
-
grok = Grok.new
|
14
|
-
grok.add_patterns_from_file("../../patterns/base")
|
15
|
-
grok.compile("%{COMBINEDAPACHELOG}")
|
16
|
-
|
17
|
-
#rubyre = Regexp.new("(?<foo>#{pattern})")
|
18
|
-
#rubyre = Regexp.new(pattern)
|
19
|
-
|
20
|
-
matches = { :grok => 0, :rubyre => 0 }
|
21
|
-
failures = { :grok => 0, :rubyre => 0 }
|
22
|
-
def time(iterations, &block)
|
23
|
-
start = Time.now
|
24
|
-
file = File.open("/b/logs/access")
|
25
|
-
data = (1 .. iterations).collect { file.readline() }
|
26
|
-
data.each do |line|
|
27
|
-
block.call(line)
|
28
|
-
end
|
29
|
-
return Time.now - start
|
30
|
-
end
|
31
|
-
|
32
|
-
groktime = time(iterations) do |line|
|
33
|
-
m = grok.match(line)
|
34
|
-
if m
|
35
|
-
matches[:grok] += 1
|
36
|
-
m.captures["FOO"]
|
37
|
-
else
|
38
|
-
#puts line
|
39
|
-
failures[:grok] +=1
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
#rubyretime = time(iterations) do |line|
|
44
|
-
#m = rubyre.match(line)
|
45
|
-
#if m
|
46
|
-
#matches[:rubyre] += 1
|
47
|
-
#m["foo"]
|
48
|
-
#end
|
49
|
-
#end
|
50
|
-
|
51
|
-
puts "Grok: #{matches[:grok] / groktime}"
|
52
|
-
puts matches.inspect
|
53
|
-
puts failures.inspect
|
54
|
-
#puts "rubyre: #{rubyretime}"
|
55
|
-
#puts matches.inspect
|
56
|
-
#result = RubyProf.stop
|
57
|
-
#printer = RubyProf::FlatPrinter.new(result)
|
58
|
-
#printer.print(STDOUT, 0)
|