jls-grok 0.4.7 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/Grok.rb +3 -0
- data/lib/grok-pure.rb +137 -0
- data/lib/grok.rb +3 -2
- data/lib/grok/{match.rb → c-ext/match.rb} +0 -0
- data/lib/grok/{pile.rb → c-ext/pile.rb} +0 -0
- data/lib/grok/pure/match.rb +45 -0
- data/lib/grok/pure/pile.rb +56 -0
- metadata +9 -29
- data/INSTALL +0 -12
- data/Rakefile +0 -12
- data/examples/grok-web.rb +0 -131
- data/examples/pattern-discovery.rb +0 -39
- data/examples/test.rb +0 -30
- data/grok.gemspec +0 -55
- data/test/Makefile +0 -17
- data/test/alltests.rb +0 -8
- data/test/general/basic_test.rb +0 -58
- data/test/general/captures_test.rb +0 -105
- data/test/patterns/day.rb +0 -22
- data/test/patterns/host.rb +0 -30
- data/test/patterns/ip.input +0 -10000
- data/test/patterns/ip.rb +0 -31
- data/test/patterns/iso8601.rb +0 -68
- data/test/patterns/month.rb +0 -24
- data/test/patterns/number.rb +0 -69
- data/test/patterns/path.rb +0 -31
- data/test/patterns/prog.rb +0 -21
- data/test/patterns/quotedstring.rb +0 -54
- data/test/patterns/uri.rb +0 -46
- data/test/run.sh +0 -3
- data/test/speedtest.rb +0 -58
data/test/patterns/ip.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class IPPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_ips
|
12
|
-
@grok.compile("%{IP}")
|
13
|
-
File.open("#{File.dirname(__FILE__)}/ip.input").each do |line|
|
14
|
-
line.chomp!
|
15
|
-
match = @grok.match(line)
|
16
|
-
assert_not_equal(false, match)
|
17
|
-
assert_equal(line, match.captures["IP"][0])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_non_ips
|
22
|
-
@grok.compile("%{IP}")
|
23
|
-
nonips = %w{255.255.255.256 0.1.a.33 300.1.2.3 300 400.4.3.a 1.2.3.b
|
24
|
-
1..3.4.5 hello world}
|
25
|
-
nonips << "hello world"
|
26
|
-
nonips.each do |input|
|
27
|
-
match = @grok.match(input)
|
28
|
-
assert_equal(false, match)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
data/test/patterns/iso8601.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class ISO8601PatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("^%{TIMESTAMP_ISO8601}$")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_iso8601
|
13
|
-
times = [
|
14
|
-
"2001-01-01T00:00:00",
|
15
|
-
"1974-03-02T04:09:09",
|
16
|
-
"2010-05-03T08:18:18+00:00",
|
17
|
-
"2004-07-04T12:27:27-00:00",
|
18
|
-
"2001-09-05T16:36:36+0000",
|
19
|
-
"2001-11-06T20:45:45-0000",
|
20
|
-
"2001-12-07T23:54:54Z",
|
21
|
-
"2001-01-01T00:00:00.123456",
|
22
|
-
"1974-03-02T04:09:09.123456",
|
23
|
-
"2010-05-03T08:18:18.123456+00:00",
|
24
|
-
"2004-07-04T12:27:27.123456-00:00",
|
25
|
-
"2001-09-05T16:36:36.123456+0000",
|
26
|
-
"2001-11-06T20:45:45.123456-0000",
|
27
|
-
"2001-12-07T23:54:54.123456Z",
|
28
|
-
"2001-12-07T23:54:60.123456Z", # '60' second is a leap second.
|
29
|
-
]
|
30
|
-
times.each do |time|
|
31
|
-
match = @grok.match(time)
|
32
|
-
assert_not_equal(false, match, "Expected #{time} to match TIMESTAMP_ISO8601")
|
33
|
-
assert_equal(time, match.captures["TIMESTAMP_ISO8601"][0])
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_iso8601_nomatch
|
38
|
-
times = [
|
39
|
-
"2001-13-01T00:00:00", # invalid month
|
40
|
-
"2001-00-01T00:00:00", # invalid month
|
41
|
-
"2001-01-00T00:00:00", # invalid day
|
42
|
-
"2001-01-32T00:00:00", # invalid day
|
43
|
-
"2001-01-aT00:00:00", # invalid day
|
44
|
-
"2001-01-1aT00:00:00", # invalid day
|
45
|
-
"2001-01-01Ta0:00:00", # invalid hour
|
46
|
-
"2001-01-01T0:00:00", # invalid hour
|
47
|
-
"2001-01-01T25:00:00", # invalid hour
|
48
|
-
"2001-01-01T01:60:00", # invalid minute
|
49
|
-
"2001-01-01T00:aa:00", # invalid minute
|
50
|
-
"2001-01-01T00:00:aa", # invalid second
|
51
|
-
"2001-01-01T00:00:-1", # invalid second
|
52
|
-
"2001-01-01T00:00:61", # invalid second
|
53
|
-
"2001-01-01T00:00:00A", # invalid timezone
|
54
|
-
"2001-01-01T00:00:00+", # invalid timezone
|
55
|
-
"2001-01-01T00:00:00+25", # invalid timezone
|
56
|
-
"2001-01-01T00:00:00+2500", # invalid timezone
|
57
|
-
"2001-01-01T00:00:00+25:00", # invalid timezone
|
58
|
-
"2001-01-01T00:00:00-25", # invalid timezone
|
59
|
-
"2001-01-01T00:00:00-2500", # invalid timezone
|
60
|
-
"2001-01-01T00:00:00-00:61", # invalid timezone
|
61
|
-
]
|
62
|
-
times.each do |time|
|
63
|
-
match = @grok.match(time)
|
64
|
-
assert_equal(false, match, "Expected #{time} to not match TIMESTAMP_ISO8601")
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
data/test/patterns/month.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class MonthPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("%{MONTH}")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_months
|
13
|
-
months = ["Jan", "January", "Feb", "February", "Mar", "March", "Apr",
|
14
|
-
"April", "May", "Jun", "June", "Jul", "July", "Aug", "August",
|
15
|
-
"Sep", "September", "Oct", "October", "Nov", "November", "Dec",
|
16
|
-
"December"]
|
17
|
-
months.each do |month|
|
18
|
-
match = @grok.match(month)
|
19
|
-
assert_not_equal(false, match, "Expected #{month} to match")
|
20
|
-
assert_equal(month, match.captures["MONTH"][0])
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
data/test/patterns/number.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class NumberPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_match_number
|
12
|
-
@grok.compile("%{NUMBER}")
|
13
|
-
# step of a prime number near 100 so we get about 2000 iterations
|
14
|
-
#puts @grok.expanded_pattern.inspect
|
15
|
-
-100000.step(100000, 97) do |value|
|
16
|
-
match = @grok.match(value.to_s)
|
17
|
-
assert_not_equal(false, match, "#{value} should not match false")
|
18
|
-
assert_equal(value.to_s, match.captures["NUMBER"][0])
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_match_number_float
|
23
|
-
# generate some random floating point values
|
24
|
-
# always seed with the same random number, so the test is always the same
|
25
|
-
srand(0)
|
26
|
-
@grok.compile("%{NUMBER}")
|
27
|
-
0.upto(1000) do |value|
|
28
|
-
value = (rand * 100000 - 50000).to_s
|
29
|
-
match = @grok.match(value)
|
30
|
-
assert_not_equal(false, match)
|
31
|
-
assert_equal(value, match.captures["NUMBER"][0])
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_match_number_amid_things
|
36
|
-
@grok.compile("%{NUMBER}")
|
37
|
-
value = "hello 12345 world"
|
38
|
-
match = @grok.match(value)
|
39
|
-
assert_not_equal(false, match)
|
40
|
-
assert_equal("12345", match.captures["NUMBER"][0])
|
41
|
-
|
42
|
-
value = "Something costs $55.4!"
|
43
|
-
match = @grok.match(value)
|
44
|
-
assert_not_equal(false, match)
|
45
|
-
assert_equal("55.4", match.captures["NUMBER"][0])
|
46
|
-
end
|
47
|
-
|
48
|
-
def test_no_match_number
|
49
|
-
@grok.compile("%{NUMBER}")
|
50
|
-
["foo", "", " ", ".", "hello world", "-abcd"].each do |value|
|
51
|
-
match = @grok.match(value.to_s)
|
52
|
-
assert_equal(false, match)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_match_base16num
|
57
|
-
@grok.compile("%{BASE16NUM}")
|
58
|
-
# Ruby represents negative values in a strange way, so only
|
59
|
-
# test positive numbers for now.
|
60
|
-
# I don't think anyone uses negative values in hex anyway...
|
61
|
-
0.upto(1000) do |value|
|
62
|
-
[("%x" % value), ("0x%08x" % value), ("%016x" % value)].each do |hexstr|
|
63
|
-
match = @grok.match(hexstr)
|
64
|
-
assert_not_equal(false, match)
|
65
|
-
assert_equal(hexstr, match.captures["BASE16NUM"][0])
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/test/patterns/path.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class PathPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("%{PATH}")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_unix_paths
|
13
|
-
paths = %w{/ /usr /usr/bin /usr/bin/foo /etc/motd /home/.test
|
14
|
-
/foo/bar//baz //testing /.test /%foo% /asdf/asdf,v}
|
15
|
-
paths.each do |path|
|
16
|
-
match = @grok.match(path)
|
17
|
-
assert_not_equal(false, match)
|
18
|
-
assert_equal(path, match.captures["PATH"][0])
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_windows_paths
|
23
|
-
paths = %w{C:\WINDOWS \\\\Foo\bar \\\\1.2.3.4\C$ \\\\some\path\here.exe}
|
24
|
-
paths << "C:\\Documents and Settings\\"
|
25
|
-
paths.each do |path|
|
26
|
-
match = @grok.match(path)
|
27
|
-
assert_not_equal(false, match, "Expected #{path} to match, but it didn't.")
|
28
|
-
assert_equal(path, match.captures["PATH"][0])
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
data/test/patterns/prog.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class ProgPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("^%{PROG}$")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_progs
|
13
|
-
progs = %w{kernel foo-bar foo_bar foo/bar/baz}
|
14
|
-
progs.each do |prog|
|
15
|
-
match = @grok.match(prog)
|
16
|
-
assert_not_equal(false, prog, "Expected #{prog} to match.")
|
17
|
-
assert_equal(prog, match.captures["PROG"][0], "Expected #{prog} to match capture.")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
#require 'rubygems'
|
2
|
-
require 'grok'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class QuotedStringPatternsTest < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
@grok = Grok.new
|
8
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
9
|
-
@grok.add_patterns_from_file(path)
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_quoted_string_common
|
13
|
-
@grok.compile("%{QUOTEDSTRING}")
|
14
|
-
inputs = ["hello", ""]
|
15
|
-
quotes = %w{" ' `}
|
16
|
-
inputs.each do |value|
|
17
|
-
quotes.each do |quote|
|
18
|
-
str = "#{quote}#{value}#{quote}"
|
19
|
-
match = @grok.match(str)
|
20
|
-
assert_not_equal(false, match)
|
21
|
-
assert_equal(str, match.captures["QUOTEDSTRING"][0])
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_quoted_string_inside_escape
|
27
|
-
@grok.compile("%{QUOTEDSTRING}")
|
28
|
-
quotes = %w{" ' `}
|
29
|
-
quotes.each do |quote|
|
30
|
-
str = "#{quote}hello \\#{quote}world\\#{quote}#{quote}"
|
31
|
-
match = @grok.match(str)
|
32
|
-
assert_not_equal(false, match)
|
33
|
-
assert_equal(str, match.captures["QUOTEDSTRING"][0])
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_escaped_quotes_no_match_quoted_string
|
38
|
-
@grok.compile("%{QUOTEDSTRING}")
|
39
|
-
inputs = ["\\\"testing\\\"", "\\\'testing\\\'", "\\\`testing\\\`",]
|
40
|
-
inputs.each do |value|
|
41
|
-
match = @grok.match(value)
|
42
|
-
assert_equal(false, match)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_non_quoted_strings_no_match
|
47
|
-
@grok.compile("%{QUOTEDSTRING}")
|
48
|
-
inputs = ["\\\"testing", "testing", "hello world ' something ` foo"]
|
49
|
-
inputs.each do |value|
|
50
|
-
match = @grok.match(value)
|
51
|
-
assert_equal(false, match)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
data/test/patterns/uri.rb
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
require 'grok'
|
2
|
-
require 'test/unit'
|
3
|
-
|
4
|
-
class URIPatternsTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@grok = Grok.new
|
7
|
-
path = "#{File.dirname(__FILE__)}/../../../patterns/base"
|
8
|
-
@grok.add_patterns_from_file(path)
|
9
|
-
@grok.compile("%{URI}")
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_urls
|
13
|
-
urls = ["http://www.google.com", "telnet://helloworld",
|
14
|
-
"http://www.example.com/", "http://www.example.com/test.html",
|
15
|
-
"http://www.example.com/test.html?foo=bar",
|
16
|
-
"http://www.example.com/test.html?foo=bar&fizzle=baz",
|
17
|
-
"http://www.example.com:80/test.html?foo=bar&fizzle=baz",
|
18
|
-
"https://www.example.com:443/test.html?foo=bar&fizzle=baz",
|
19
|
-
"https://user@www.example.com:443/test.html?foo=bar&fizzle=baz",
|
20
|
-
"https://user:pass@somehost/fetch.pl",
|
21
|
-
"puppet:///",
|
22
|
-
"http://www.foo.com",
|
23
|
-
"http://www.foo.com/",
|
24
|
-
"http://www.foo.com/?testing",
|
25
|
-
"http://www.foo.com/?one=two",
|
26
|
-
"http://www.foo.com/?one=two&foo=bar",
|
27
|
-
"foo://somehost.com:12345",
|
28
|
-
"foo://user@somehost.com:12345",
|
29
|
-
"foo://user@somehost.com:12345/",
|
30
|
-
"foo://user@somehost.com:12345/foo.bar/baz/fizz",
|
31
|
-
"foo://user@somehost.com:12345/foo.bar/baz/fizz?test",
|
32
|
-
"foo://user@somehost.com:12345/foo.bar/baz/fizz?test=1&sink&foo=4",
|
33
|
-
"http://www.google.com/search?hl=en&source=hp&q=hello+world+%5E%40%23%24&btnG=Google+Search",
|
34
|
-
"http://www.freebsd.org/cgi/url.cgi?ports/sysutils/grok/pkg-descr",
|
35
|
-
"http://www.google.com/search?q=CAPTCHA+ssh&start=0&ie=utf-8&oe=utf-8&client=firefox-a&rls=org.mozilla:en-US:official",
|
36
|
-
"svn+ssh://somehost:12345/testing",
|
37
|
-
]
|
38
|
-
|
39
|
-
urls.each do |url|
|
40
|
-
match = @grok.match(url)
|
41
|
-
assert_not_equal(false, match, "Expected this to match: #{url}")
|
42
|
-
assert_equal(url, match.captures["URI"][0])
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
data/test/run.sh
DELETED
data/test/speedtest.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'grok'
|
5
|
-
#require 'ruby-prof'
|
6
|
-
require 'pp'
|
7
|
-
|
8
|
-
#RubyProf.start
|
9
|
-
|
10
|
-
iterations = 100000
|
11
|
-
pattern = "[A-z0-9_-]*\\[[0-9]+\\]"
|
12
|
-
|
13
|
-
grok = Grok.new
|
14
|
-
grok.add_patterns_from_file("../../patterns/base")
|
15
|
-
grok.compile("%{COMBINEDAPACHELOG}")
|
16
|
-
|
17
|
-
#rubyre = Regexp.new("(?<foo>#{pattern})")
|
18
|
-
#rubyre = Regexp.new(pattern)
|
19
|
-
|
20
|
-
matches = { :grok => 0, :rubyre => 0 }
|
21
|
-
failures = { :grok => 0, :rubyre => 0 }
|
22
|
-
def time(iterations, &block)
|
23
|
-
start = Time.now
|
24
|
-
file = File.open("/b/logs/access")
|
25
|
-
data = (1 .. iterations).collect { file.readline() }
|
26
|
-
data.each do |line|
|
27
|
-
block.call(line)
|
28
|
-
end
|
29
|
-
return Time.now - start
|
30
|
-
end
|
31
|
-
|
32
|
-
groktime = time(iterations) do |line|
|
33
|
-
m = grok.match(line)
|
34
|
-
if m
|
35
|
-
matches[:grok] += 1
|
36
|
-
m.captures["FOO"]
|
37
|
-
else
|
38
|
-
#puts line
|
39
|
-
failures[:grok] +=1
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
#rubyretime = time(iterations) do |line|
|
44
|
-
#m = rubyre.match(line)
|
45
|
-
#if m
|
46
|
-
#matches[:rubyre] += 1
|
47
|
-
#m["foo"]
|
48
|
-
#end
|
49
|
-
#end
|
50
|
-
|
51
|
-
puts "Grok: #{matches[:grok] / groktime}"
|
52
|
-
puts matches.inspect
|
53
|
-
puts failures.inspect
|
54
|
-
#puts "rubyre: #{rubyretime}"
|
55
|
-
#puts matches.inspect
|
56
|
-
#result = RubyProf.stop
|
57
|
-
#printer = RubyProf::FlatPrinter.new(result)
|
58
|
-
#printer.print(STDOUT, 0)
|