jls-grok 0.4.7 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/Grok.rb +3 -0
- data/lib/grok-pure.rb +137 -0
- data/lib/grok.rb +3 -2
- data/lib/grok/{match.rb → c-ext/match.rb} +0 -0
- data/lib/grok/{pile.rb → c-ext/pile.rb} +0 -0
- data/lib/grok/pure/match.rb +45 -0
- data/lib/grok/pure/pile.rb +56 -0
- metadata +9 -29
- data/INSTALL +0 -12
- data/Rakefile +0 -12
- data/examples/grok-web.rb +0 -131
- data/examples/pattern-discovery.rb +0 -39
- data/examples/test.rb +0 -30
- data/grok.gemspec +0 -55
- data/test/Makefile +0 -17
- data/test/alltests.rb +0 -8
- data/test/general/basic_test.rb +0 -58
- data/test/general/captures_test.rb +0 -105
- data/test/patterns/day.rb +0 -22
- data/test/patterns/host.rb +0 -30
- data/test/patterns/ip.input +0 -10000
- data/test/patterns/ip.rb +0 -31
- data/test/patterns/iso8601.rb +0 -68
- data/test/patterns/month.rb +0 -24
- data/test/patterns/number.rb +0 -69
- data/test/patterns/path.rb +0 -31
- data/test/patterns/prog.rb +0 -21
- data/test/patterns/quotedstring.rb +0 -54
- data/test/patterns/uri.rb +0 -46
- data/test/run.sh +0 -3
- data/test/speedtest.rb +0 -58
data/lib/Grok.rb
ADDED
data/lib/grok-pure.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
|
3
|
+
# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
|
4
|
+
class Grok
|
5
|
+
attr_accessor :pattern
|
6
|
+
attr_accessor :expanded_pattern
|
7
|
+
|
8
|
+
PATTERN_RE = \
|
9
|
+
/%{ # match '%{' not prefixed with '\'
|
10
|
+
(?<name> # match the pattern name
|
11
|
+
(?<pattern>[A-z0-9]+)
|
12
|
+
(?::(?<subname>[A-z0-9_:]+))?
|
13
|
+
)
|
14
|
+
(?:=(?<definition>
|
15
|
+
(?:
|
16
|
+
(?:[^{}\\]+|\\.+)+
|
17
|
+
|
|
18
|
+
(?<curly>\{(?:(?>[^{}]+|(?>\\[{}])+)|(\g<curly>))*\})+
|
19
|
+
)+
|
20
|
+
))?
|
21
|
+
[^}]*
|
22
|
+
}/x
|
23
|
+
|
24
|
+
GROK_OK = 0
|
25
|
+
GROK_ERROR_FILE_NOT_ACCESSIBLE = 1
|
26
|
+
GROK_ERROR_PATTERN_NOT_FOUND = 2
|
27
|
+
GROK_ERROR_UNEXPECTED_READ_SIZE = 3
|
28
|
+
GROK_ERROR_COMPILE_FAILED = 4
|
29
|
+
GROK_ERROR_UNINITIALIZED = 5
|
30
|
+
GROK_ERROR_PCRE_ERROR = 6
|
31
|
+
GROK_ERROR_NOMATCH = 7
|
32
|
+
|
33
|
+
public
|
34
|
+
def initialize
|
35
|
+
@patterns = {}
|
36
|
+
|
37
|
+
# TODO(sissel): Throw exception if we aren't using Ruby 1.9.2 or newer.
|
38
|
+
end # def initialize
|
39
|
+
|
40
|
+
public
|
41
|
+
def add_pattern(name, pattern)
|
42
|
+
#puts "#{name} => #{pattern}"
|
43
|
+
@patterns[name] = pattern
|
44
|
+
return nil
|
45
|
+
end
|
46
|
+
|
47
|
+
public
|
48
|
+
def add_patterns_from_file(path)
|
49
|
+
file = File.new(path, "r")
|
50
|
+
file.each do |line|
|
51
|
+
next if line =~ /^\s*#/
|
52
|
+
#puts "Pattern: #{line}"
|
53
|
+
name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
|
54
|
+
next if pattern.nil?
|
55
|
+
add_pattern(name, pattern.chomp)
|
56
|
+
end
|
57
|
+
return nil
|
58
|
+
end # def add_patterns_from_file
|
59
|
+
|
60
|
+
public
|
61
|
+
def compile(pattern)
|
62
|
+
@capture_map = {}
|
63
|
+
|
64
|
+
iterations_left = 100
|
65
|
+
@pattern = pattern
|
66
|
+
@expanded_pattern = pattern
|
67
|
+
index = 0
|
68
|
+
|
69
|
+
# Replace any instances of '%{FOO}' with that pattern.
|
70
|
+
loop do
|
71
|
+
if iterations_left == 0
|
72
|
+
raise "Deep recursion pattern compilation of #{pattern.inspect} - expanded: #{@expanded_pattern.inspect}"
|
73
|
+
end
|
74
|
+
iterations_left -= 1
|
75
|
+
m = PATTERN_RE.match(@expanded_pattern)
|
76
|
+
break if !m
|
77
|
+
|
78
|
+
if m["definition"]
|
79
|
+
add_pattern(m["pattern"], m["definition"])
|
80
|
+
end
|
81
|
+
|
82
|
+
if @patterns.include?(m["pattern"])
|
83
|
+
# create a named capture index that we can push later as the named
|
84
|
+
# pattern. We do this because ruby regexp can't capture something
|
85
|
+
# by the same name twice.
|
86
|
+
p = @patterns[m["pattern"]]
|
87
|
+
|
88
|
+
capture = "a#{index}" # named captures have to start with letters?
|
89
|
+
#capture = "%04d" % "#{index}" # named captures have to start with letters?
|
90
|
+
replacement_pattern = "(?<#{capture}>#{p})"
|
91
|
+
#p(:input => m[0], :pattern => replacement_pattern)
|
92
|
+
@capture_map[capture] = m["name"]
|
93
|
+
@expanded_pattern.sub!(m[0], replacement_pattern)
|
94
|
+
index += 1
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
@regexp = Regexp.new(@expanded_pattern)
|
99
|
+
end # def compile
|
100
|
+
|
101
|
+
public
|
102
|
+
def match(text)
|
103
|
+
match = @regexp.match(text)
|
104
|
+
|
105
|
+
if match
|
106
|
+
grokmatch = Grok::Match.new
|
107
|
+
grokmatch.subject = text
|
108
|
+
grokmatch.start, grokmatch.end = match.offset(0)
|
109
|
+
grokmatch.grok = self
|
110
|
+
grokmatch.match = match
|
111
|
+
return grokmatch
|
112
|
+
else
|
113
|
+
return false
|
114
|
+
end
|
115
|
+
end # def match
|
116
|
+
|
117
|
+
public
|
118
|
+
def discover(input)
|
119
|
+
init_discover if @discover == nil
|
120
|
+
|
121
|
+
return @discover.discover(input)
|
122
|
+
end # def discover
|
123
|
+
|
124
|
+
private
|
125
|
+
def init_discover
|
126
|
+
@discover = GrokDiscover.new(self)
|
127
|
+
@discover.logmask = logmask
|
128
|
+
end # def init_discover
|
129
|
+
|
130
|
+
public
|
131
|
+
def capture_name(id)
|
132
|
+
return @capture_map[id]
|
133
|
+
end # def capture_name
|
134
|
+
end # Grok
|
135
|
+
|
136
|
+
require "grok/pure/match"
|
137
|
+
require "grok/pure/pile"
|
data/lib/grok.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "rubygems"
|
2
2
|
require "ffi"
|
3
3
|
|
4
|
+
# TODO(sissel): Check if 'grok-pure' has been loaded and abort?
|
4
5
|
class Grok < FFI::Struct
|
5
6
|
module CGrok
|
6
7
|
extend FFI::Library
|
@@ -128,5 +129,5 @@ class Grok < FFI::Struct
|
|
128
129
|
end
|
129
130
|
end # Grok
|
130
131
|
|
131
|
-
require "grok/match"
|
132
|
-
require "grok/pile"
|
132
|
+
require "grok/c-ext/match"
|
133
|
+
require "grok/c-ext/pile"
|
File without changes
|
File without changes
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "grok-pure"
|
2
|
+
|
3
|
+
class Grok::Match
|
4
|
+
attr_accessor :subject
|
5
|
+
attr_accessor :start
|
6
|
+
attr_accessor :end
|
7
|
+
attr_accessor :grok
|
8
|
+
attr_accessor :match
|
9
|
+
|
10
|
+
public
|
11
|
+
def initialize
|
12
|
+
@captures = nil
|
13
|
+
end
|
14
|
+
|
15
|
+
public
|
16
|
+
def each_capture
|
17
|
+
@captures = Hash.new { |h, k| h[k] = Array.new }
|
18
|
+
|
19
|
+
#p :expanded => @grok.expanded_pattern
|
20
|
+
#p :map => @grok.capture_map
|
21
|
+
@match.names.zip(@match.captures).each do |id, value|
|
22
|
+
#p :match => id, :value => value
|
23
|
+
name = @grok.capture_name(id)
|
24
|
+
#next if value == nil
|
25
|
+
yield name, value
|
26
|
+
end
|
27
|
+
|
28
|
+
end # def each_capture
|
29
|
+
|
30
|
+
public
|
31
|
+
def captures
|
32
|
+
if @captures.nil?
|
33
|
+
@captures = Hash.new { |h,k| h[k] = [] }
|
34
|
+
each_capture do |key, val|
|
35
|
+
@captures[key] << val
|
36
|
+
end
|
37
|
+
end
|
38
|
+
return @captures
|
39
|
+
end # def captures
|
40
|
+
|
41
|
+
public
|
42
|
+
def [](name)
|
43
|
+
return captures[name]
|
44
|
+
end # def []
|
45
|
+
end # Grok::Match
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "grok-pure"
|
2
|
+
|
3
|
+
# A grok pile is an easy way to have multiple patterns together so
|
4
|
+
# that you can try to match against each one.
|
5
|
+
# The API provided should be similar to the normal Grok
|
6
|
+
# interface, but you can compile multiple patterns and match will
|
7
|
+
# try each one until a match is found.
|
8
|
+
class Grok
|
9
|
+
class Pile
|
10
|
+
def initialize
|
11
|
+
@groks = []
|
12
|
+
@patterns = {}
|
13
|
+
@pattern_files = []
|
14
|
+
end # def initialize
|
15
|
+
|
16
|
+
# see Grok#add_pattern
|
17
|
+
def add_pattern(name, string)
|
18
|
+
@patterns[name] = string
|
19
|
+
end # def add_pattern
|
20
|
+
|
21
|
+
# see Grok#add_patterns_from_file
|
22
|
+
def add_patterns_from_file(path)
|
23
|
+
if !File.exists?(path)
|
24
|
+
raise "File does not exist: #{path}"
|
25
|
+
end
|
26
|
+
@pattern_files << path
|
27
|
+
end # def add_patterns_from_file
|
28
|
+
|
29
|
+
# see Grok#compile
|
30
|
+
def compile(pattern)
|
31
|
+
grok = Grok.new
|
32
|
+
@patterns.each do |name, value|
|
33
|
+
grok.add_pattern(name, value)
|
34
|
+
end
|
35
|
+
@pattern_files.each do |path|
|
36
|
+
grok.add_patterns_from_file(path)
|
37
|
+
end
|
38
|
+
grok.compile(pattern)
|
39
|
+
@groks << grok
|
40
|
+
end # def compile
|
41
|
+
|
42
|
+
# Slight difference from Grok#match in that it returns
|
43
|
+
# the Grok instance that matched successfully in addition
|
44
|
+
# to the GrokMatch result.
|
45
|
+
# See also: Grok#match
|
46
|
+
def match(string)
|
47
|
+
@groks.each do |grok|
|
48
|
+
match = grok.match(string)
|
49
|
+
if match
|
50
|
+
return [grok, match]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
return false
|
54
|
+
end # def match
|
55
|
+
end # class Pile
|
56
|
+
end # class Grok
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: jls-grok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.5.2
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jordan Sissel
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2011-
|
14
|
+
date: 2011-08-21 00:00:00 -07:00
|
15
15
|
default_executable:
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirement: &id001 !ruby/object:Gem::Requirement
|
21
21
|
none: false
|
22
22
|
requirements:
|
23
|
-
- -
|
23
|
+
- - ~>
|
24
24
|
- !ruby/object:Gem::Version
|
25
25
|
version: 0.6.3
|
26
26
|
type: :runtime
|
@@ -36,32 +36,13 @@ extensions: []
|
|
36
36
|
extra_rdoc_files: []
|
37
37
|
|
38
38
|
files:
|
39
|
-
-
|
40
|
-
-
|
41
|
-
- examples/grok-web.rb
|
42
|
-
- examples/pattern-discovery.rb
|
43
|
-
- examples/test.rb
|
44
|
-
- grok.gemspec
|
39
|
+
- lib/Grok.rb
|
40
|
+
- lib/grok-pure.rb
|
45
41
|
- lib/grok.rb
|
46
|
-
- lib/grok/
|
47
|
-
- lib/grok/
|
48
|
-
-
|
49
|
-
-
|
50
|
-
- test/general/basic_test.rb
|
51
|
-
- test/general/captures_test.rb
|
52
|
-
- test/patterns/day.rb
|
53
|
-
- test/patterns/host.rb
|
54
|
-
- test/patterns/ip.input
|
55
|
-
- test/patterns/ip.rb
|
56
|
-
- test/patterns/iso8601.rb
|
57
|
-
- test/patterns/month.rb
|
58
|
-
- test/patterns/number.rb
|
59
|
-
- test/patterns/path.rb
|
60
|
-
- test/patterns/prog.rb
|
61
|
-
- test/patterns/quotedstring.rb
|
62
|
-
- test/patterns/uri.rb
|
63
|
-
- test/run.sh
|
64
|
-
- test/speedtest.rb
|
42
|
+
- lib/grok/c-ext/pile.rb
|
43
|
+
- lib/grok/c-ext/match.rb
|
44
|
+
- lib/grok/pure/pile.rb
|
45
|
+
- lib/grok/pure/match.rb
|
65
46
|
has_rdoc: true
|
66
47
|
homepage: http://code.google.com/p/semicomplete/wiki/Grok
|
67
48
|
licenses: []
|
@@ -72,7 +53,6 @@ rdoc_options: []
|
|
72
53
|
require_paths:
|
73
54
|
- lib
|
74
55
|
- lib
|
75
|
-
- ext
|
76
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
77
57
|
none: false
|
78
58
|
requirements:
|
data/INSTALL
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
- You'll need grok installed.
|
2
|
-
From 'grok', do:
|
3
|
-
% make install
|
4
|
-
# This will install grok, libgrok, and grok's headers
|
5
|
-
|
6
|
-
- You'll need the 'Grok' ruby module installed.
|
7
|
-
From 'grok/ruby' do:
|
8
|
-
% ruby extconf.rb
|
9
|
-
% make install
|
10
|
-
|
11
|
-
# Test with:
|
12
|
-
% ruby -e 'require "Grok"; puts "Grok OK"'
|
data/Rakefile
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
task :default => [:package]
|
2
|
-
|
3
|
-
task :package do
|
4
|
-
system("make -C ext clean; rm ext/Makefile")
|
5
|
-
system("svn up")
|
6
|
-
system("gem build grok.gemspec")
|
7
|
-
end
|
8
|
-
|
9
|
-
task :publish do
|
10
|
-
latest_gem = %x{ls -t jls-grok*.gem}.split("\n").first
|
11
|
-
system("gem push #{latest_gem}")
|
12
|
-
end
|
data/examples/grok-web.rb
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# Simple web application that will let you feed grok's discovery feature
|
4
|
-
# a bunch of data, and grok will show you patterns found and the results
|
5
|
-
# of that pattern as matched against the same input.
|
6
|
-
|
7
|
-
require 'rubygems'
|
8
|
-
require 'sinatra'
|
9
|
-
require 'grok'
|
10
|
-
|
11
|
-
get '/' do
|
12
|
-
redirect "/demo/grok-discover/index"
|
13
|
-
end
|
14
|
-
|
15
|
-
get "/demo/grok-discover/index" do
|
16
|
-
haml :index
|
17
|
-
end
|
18
|
-
|
19
|
-
post "/demo/grok-discover/grok" do
|
20
|
-
grok = Grok.new
|
21
|
-
grok.add_patterns_from_file("/usr/local/share/grok/patterns/base")
|
22
|
-
@results = []
|
23
|
-
params[:data].split("\n").each do |line|
|
24
|
-
pattern = grok.discover(line)
|
25
|
-
grok.compile(pattern)
|
26
|
-
match = grok.match(line)
|
27
|
-
puts "Got input: #{line}"
|
28
|
-
puts " => pattern: (#{match != false}) #{pattern}"
|
29
|
-
@results << {
|
30
|
-
:input => line,
|
31
|
-
:pattern => grok.pattern.gsub(/\\Q|\\E/, ""),
|
32
|
-
:full_pattern => grok.expanded_pattern,
|
33
|
-
:match => (match and match.captures or false),
|
34
|
-
}
|
35
|
-
end
|
36
|
-
haml :grok
|
37
|
-
end
|
38
|
-
|
39
|
-
get "/demo/grok-discover/style.css" do
|
40
|
-
sass :style
|
41
|
-
end
|
42
|
-
|
43
|
-
__END__
|
44
|
-
@@ style
|
45
|
-
h1
|
46
|
-
color: red
|
47
|
-
.original
|
48
|
-
.regexp
|
49
|
-
display: block
|
50
|
-
border: 1px solid grey
|
51
|
-
padding: 1em
|
52
|
-
|
53
|
-
.results
|
54
|
-
width: 80%
|
55
|
-
margin-left: auto
|
56
|
-
th
|
57
|
-
text-align: left
|
58
|
-
td
|
59
|
-
border-top: 1px solid black
|
60
|
-
@@ layout
|
61
|
-
%html
|
62
|
-
%head
|
63
|
-
%title Grok Web
|
64
|
-
%link{:rel => "stylesheet", :href => "/demo/grok-discover/style.css"}
|
65
|
-
%body
|
66
|
-
=yield
|
67
|
-
|
68
|
-
@@ index
|
69
|
-
#header
|
70
|
-
%h1 Grok Web
|
71
|
-
#content
|
72
|
-
Paste some log data below. I'll do my best to have grok generate a pattern for you.
|
73
|
-
|
74
|
-
%p
|
75
|
-
Learn more about grok here:
|
76
|
-
%a{:href => "http://code.google.com/p/semicomplete/wiki/Grok"} Grok
|
77
|
-
|
78
|
-
%p
|
79
|
-
This is running off of my cable modem for now, so if it's sluggish, that's
|
80
|
-
why. Be gentle.
|
81
|
-
%form{:action => "/demo/grok-discover/grok", :method => "post"}
|
82
|
-
%textarea{:name => "data", :rows => 10, :cols => 80}
|
83
|
-
%br
|
84
|
-
%input{:type => "submit", :value=>"submit"}
|
85
|
-
|
86
|
-
@@ grok
|
87
|
-
#header
|
88
|
-
%h1 Grok Results
|
89
|
-
%h3
|
90
|
-
%a{:href => "/demo/grok-discover/index"} Try more?
|
91
|
-
#content
|
92
|
-
%p
|
93
|
-
Below is grok's analysis of the data you provided. Each line is analyzed
|
94
|
-
separately. It uses grok's standard library of known patterns to give you a
|
95
|
-
pattern that grok can use to match more logs like the lines you provided.
|
96
|
-
%p
|
97
|
-
The results may not be perfect, but it gives you a head start on coming up with
|
98
|
-
log patterns for
|
99
|
-
%a{:href => "http://code.google.com/p/semicomplete/wiki/Grok"} grok
|
100
|
-
and
|
101
|
-
%a{:href => "http://code.google.com/p/logstash/"} logstash
|
102
|
-
%ol
|
103
|
-
- @results.each do |result|
|
104
|
-
%li
|
105
|
-
%p.original
|
106
|
-
%b Original:
|
107
|
-
%br= result[:input]
|
108
|
-
%p
|
109
|
-
%b Pattern:
|
110
|
-
%br
|
111
|
-
%span.pattern= result[:pattern]
|
112
|
-
%p
|
113
|
-
%b
|
114
|
-
Generated Regular Expression
|
115
|
-
%small
|
116
|
-
%i You could have written this by hand, be glad you didn't have to.
|
117
|
-
%code.regexp= result[:full_pattern].gsub("<", "<")
|
118
|
-
%p
|
119
|
-
If you wanted to test this, you can paste the above expression into
|
120
|
-
pcretest(1) and it should match your input.
|
121
|
-
%p
|
122
|
-
%b Capture Results
|
123
|
-
%table.results
|
124
|
-
%tr
|
125
|
-
%th Name
|
126
|
-
%th Value
|
127
|
-
- result[:match].each do |key,val|
|
128
|
-
- val.each do |v|
|
129
|
-
%tr
|
130
|
-
%td= key
|
131
|
-
%td= v
|