jls-grok 0.10.12 → 0.10.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/grok-pure.rb +63 -42
- data/lib/grok/pure/match.rb +2 -12
- metadata +20 -25
- data/lib/grok/pure/pile.rb +0 -71
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2f35ecc4902d02d2ea98af056466222dd36b33dc
|
4
|
+
data.tar.gz: 916b52edd7029e5711ecc5a5489e2fa642f1cc8d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 404736a67e44293fd072047a6302bec733c72af889c2fb67a6c1cf8965a4f55c829c03ed5f0e6cd3e9545a77b9a21f6716f58aee8d7223ea9be4a90a8c620c04
|
7
|
+
data.tar.gz: 6680165222e03342034833cfe917c69d3fef8752894c716e277e0ae11b460f3f252c81445650612cb369d217e993cb5804597f6f5e0e862d390d3b48c1782c8b
|
data/lib/grok-pure.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require "rubygems"
|
2
2
|
require "logger"
|
3
|
-
require "grok/pure/discovery"
|
4
3
|
require "cabin"
|
4
|
+
require "grok/pure/discovery"
|
5
|
+
require "grok/pure/match"
|
5
6
|
|
6
7
|
# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
|
7
8
|
class Grok
|
@@ -9,7 +10,7 @@ class Grok
|
|
9
10
|
|
10
11
|
# The pattern input
|
11
12
|
attr_accessor :pattern
|
12
|
-
|
13
|
+
|
13
14
|
# The fully-expanded pattern (in regex form)
|
14
15
|
attr_accessor :expanded_pattern
|
15
16
|
|
@@ -18,7 +19,7 @@ class Grok
|
|
18
19
|
|
19
20
|
# The dictionary of pattern names to pattern expressions
|
20
21
|
attr_accessor :patterns
|
21
|
-
|
22
|
+
|
22
23
|
PATTERN_RE = \
|
23
24
|
/%\{ # match '%{' not prefixed with '\'
|
24
25
|
(?<name> # match the pattern name
|
@@ -50,6 +51,8 @@ class Grok
|
|
50
51
|
@logger = Cabin::Channel.new
|
51
52
|
@logger.subscribe(Logger.new(STDOUT))
|
52
53
|
@logger.level = :warn
|
54
|
+
# Captures Lambda which is generated at Grok compile time and called at match time
|
55
|
+
@captures_func = nil
|
53
56
|
|
54
57
|
# TODO(sissel): Throw exception if we aren't using Ruby 1.9.2 or newer.
|
55
58
|
end # def initialize
|
@@ -66,7 +69,7 @@ class Grok
|
|
66
69
|
file = File.new(path, "r")
|
67
70
|
file.each do |line|
|
68
71
|
# Skip comments
|
69
|
-
next if line =~ /^\s*#/
|
72
|
+
next if line =~ /^\s*#/
|
70
73
|
# File format is: NAME ' '+ PATTERN '\n'
|
71
74
|
name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
|
72
75
|
#p name => pattern
|
@@ -76,16 +79,15 @@ class Grok
|
|
76
79
|
add_pattern(name, pattern.chomp)
|
77
80
|
end
|
78
81
|
return nil
|
82
|
+
ensure
|
83
|
+
file.close
|
79
84
|
end # def add_patterns_from_file
|
80
85
|
|
81
86
|
public
|
82
|
-
def compile(pattern)
|
83
|
-
@capture_map = {}
|
84
|
-
|
87
|
+
def compile(pattern, named_captures_only=false)
|
85
88
|
iterations_left = 10000
|
86
89
|
@pattern = pattern
|
87
90
|
@expanded_pattern = pattern.clone
|
88
|
-
index = 0
|
89
91
|
|
90
92
|
# Replace any instances of '%{FOO}' with that pattern.
|
91
93
|
loop do
|
@@ -101,61 +103,87 @@ class Grok
|
|
101
103
|
end
|
102
104
|
|
103
105
|
if @patterns.include?(m["pattern"])
|
104
|
-
# create a named capture index that we can push later as the named
|
105
|
-
# pattern. We do this because ruby regexp can't capture something
|
106
|
-
# by the same name twice.
|
107
106
|
regex = @patterns[m["pattern"]]
|
108
|
-
|
107
|
+
name = m["name"]
|
109
108
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
#puts "m[0]: #{m[0]}"
|
117
|
-
#puts "replacement_pattern => #{replacement_pattern}"
|
118
|
-
#puts "Proposed: #{@expanded_pattern.sub(m[0], replacement_pattern)}"
|
109
|
+
if named_captures_only && name.index(":").nil?
|
110
|
+
# this has no semantic (pattern:foo) so we don't need to capture
|
111
|
+
replacement_pattern = "(?:#{regex})"
|
112
|
+
else
|
113
|
+
replacement_pattern = "(?<#{name}>#{regex})"
|
114
|
+
end
|
119
115
|
|
120
116
|
# Ruby's String#sub() has a bug (or misfeature) that causes it to do bad
|
121
117
|
# things to backslashes in string replacements, so let's work around it
|
122
118
|
# See this gist for more details: https://gist.github.com/1491437
|
123
119
|
# This hack should resolve LOGSTASH-226.
|
124
120
|
@expanded_pattern.sub!(m[0]) { |s| replacement_pattern }
|
125
|
-
|
126
|
-
#puts "After: #{@expanded_pattern}"
|
127
|
-
#puts "m[0]: #{m[0]}"
|
128
|
-
#puts "replacement_pattern => #{replacement_pattern}"
|
129
|
-
index += 1
|
121
|
+
@logger.debug? and @logger.debug("replacement_pattern => #{replacement_pattern}")
|
130
122
|
else
|
131
123
|
raise PatternError, "pattern #{m[0]} not defined"
|
132
124
|
end
|
133
125
|
end
|
134
126
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
end
|
127
|
+
@regexp = Regexp.new(@expanded_pattern, Regexp::MULTILINE)
|
128
|
+
@logger.debug? and @logger.debug("Grok compiled OK", :pattern => pattern,
|
129
|
+
:expanded_pattern => @expanded_pattern)
|
130
|
+
|
131
|
+
@captures_func = compile_captures_func(@regexp)
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
# compiles the captures lambda so runtime match can be optimized
|
136
|
+
def compile_captures_func(re)
|
137
|
+
re_match = ["lambda do |match, &block|"]
|
138
|
+
re.named_captures.each do |name, indices|
|
139
|
+
pattern, name, coerce = name.split(":")
|
140
|
+
indices.each do |index|
|
141
|
+
coerce = case coerce
|
142
|
+
when "int"; ".to_i"
|
143
|
+
when "float"; ".to_f"
|
144
|
+
else; ""
|
145
|
+
end
|
146
|
+
name = pattern if name.nil?
|
147
|
+
re_match << " block.call(#{name.inspect}, match[#{index}]#{coerce})"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
re_match << "end"
|
151
|
+
return eval(re_match.join("\n"))
|
152
|
+
end # def compile_captures_func
|
141
153
|
|
142
154
|
public
|
143
155
|
def match(text)
|
144
156
|
match = @regexp.match(text)
|
145
|
-
|
146
157
|
if match
|
147
158
|
grokmatch = Grok::Match.new
|
148
159
|
grokmatch.subject = text
|
149
|
-
grokmatch.start, grokmatch.end = match.offset(0)
|
150
160
|
grokmatch.grok = self
|
151
161
|
grokmatch.match = match
|
152
|
-
@logger.debug("Regexp match object", :names => match.names,
|
162
|
+
@logger.debug? and @logger.debug("Regexp match object", :names => match.names,
|
163
|
+
:captures => match.captures)
|
153
164
|
return grokmatch
|
154
165
|
else
|
155
166
|
return false
|
156
167
|
end
|
157
168
|
end # def match
|
158
169
|
|
170
|
+
# Optimized match and capture instead of calling them separately
|
171
|
+
def match_and_capture(text)
|
172
|
+
match = @regexp.match(text)
|
173
|
+
if match
|
174
|
+
@logger.debug? and @logger.debug("Regexp match object", :names => match.names,
|
175
|
+
:captures => match.captures)
|
176
|
+
@captures_func.call(match) { |k,v| yield k,v }
|
177
|
+
return true
|
178
|
+
else
|
179
|
+
return false
|
180
|
+
end
|
181
|
+
end # def match_and_capture
|
182
|
+
|
183
|
+
def capture(match, block)
|
184
|
+
@captures_func.call(match) { |k,v| block.call k,v }
|
185
|
+
end # def capture
|
186
|
+
|
159
187
|
public
|
160
188
|
def discover(input)
|
161
189
|
init_discover if @discover == nil
|
@@ -170,11 +198,4 @@ class Grok
|
|
170
198
|
@discover.logger = @logger
|
171
199
|
end # def init_discover
|
172
200
|
|
173
|
-
public
|
174
|
-
def capture_name(id)
|
175
|
-
return @capture_map[id]
|
176
|
-
end # def capture_name
|
177
201
|
end # Grok
|
178
|
-
|
179
|
-
require "grok/pure/match"
|
180
|
-
require "grok/pure/pile"
|
data/lib/grok/pure/match.rb
CHANGED
@@ -2,8 +2,6 @@ require "grok-pure"
|
|
2
2
|
|
3
3
|
class Grok::Match
|
4
4
|
attr_accessor :subject
|
5
|
-
attr_accessor :start
|
6
|
-
attr_accessor :end
|
7
5
|
attr_accessor :grok
|
8
6
|
attr_accessor :match
|
9
7
|
|
@@ -13,16 +11,8 @@ class Grok::Match
|
|
13
11
|
end
|
14
12
|
|
15
13
|
public
|
16
|
-
def each_capture
|
17
|
-
@
|
18
|
-
|
19
|
-
#p :expanded => @grok.expanded_pattern
|
20
|
-
#p :map => @grok.capture_map
|
21
|
-
@match.names.zip(@match.captures).each do |id, value|
|
22
|
-
name = @grok.capture_name(id) || "_:#{id}"
|
23
|
-
yield name, value
|
24
|
-
end
|
25
|
-
|
14
|
+
def each_capture(&block)
|
15
|
+
@grok.capture(@match, block)
|
26
16
|
end # def each_capture
|
27
17
|
|
28
18
|
public
|
metadata
CHANGED
@@ -1,33 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jls-grok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
5
|
-
prerelease:
|
4
|
+
version: 0.10.13
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jordan Sissel
|
9
8
|
- Pete Fritchman
|
10
|
-
autorequire:
|
9
|
+
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2015-02-09 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: cabin
|
17
|
-
|
18
|
-
none: false
|
16
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - '>='
|
21
19
|
- !ruby/object:Gem::Version
|
22
20
|
version: 0.6.0
|
23
|
-
|
24
|
-
prerelease: false
|
25
|
-
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
21
|
+
requirement: !ruby/object:Gem::Requirement
|
27
22
|
requirements:
|
28
|
-
- -
|
23
|
+
- - '>='
|
29
24
|
- !ruby/object:Gem::Version
|
30
25
|
version: 0.6.0
|
26
|
+
prerelease: false
|
27
|
+
type: :runtime
|
31
28
|
description: Grok ruby bindings - pattern match/extraction tool
|
32
29
|
email:
|
33
30
|
- jls@semicomplete.com
|
@@ -36,37 +33,35 @@ executables: []
|
|
36
33
|
extensions: []
|
37
34
|
extra_rdoc_files: []
|
38
35
|
files:
|
36
|
+
- lib/grok-pure.rb
|
39
37
|
- lib/grok.rb
|
38
|
+
- lib/grok/c-ext/match.rb
|
39
|
+
- lib/grok/c-ext/pile.rb
|
40
40
|
- lib/grok/namespace.rb
|
41
41
|
- lib/grok/pure/discovery.rb
|
42
|
-
- lib/grok/pure/pile.rb
|
43
42
|
- lib/grok/pure/match.rb
|
44
|
-
- lib/grok/c-ext/pile.rb
|
45
|
-
- lib/grok/c-ext/match.rb
|
46
|
-
- lib/grok-pure.rb
|
47
43
|
homepage: http://code.google.com/p/semicomplete/wiki/Grok
|
48
44
|
licenses: []
|
49
|
-
|
45
|
+
metadata: {}
|
46
|
+
post_install_message:
|
50
47
|
rdoc_options: []
|
51
48
|
require_paths:
|
52
49
|
- lib
|
53
50
|
- lib
|
54
51
|
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
-
none: false
|
56
52
|
requirements:
|
57
|
-
- -
|
53
|
+
- - '>='
|
58
54
|
- !ruby/object:Gem::Version
|
59
55
|
version: '0'
|
60
56
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
-
none: false
|
62
57
|
requirements:
|
63
|
-
- -
|
58
|
+
- - '>='
|
64
59
|
- !ruby/object:Gem::Version
|
65
60
|
version: '0'
|
66
61
|
requirements: []
|
67
|
-
rubyforge_project:
|
68
|
-
rubygems_version:
|
69
|
-
signing_key:
|
70
|
-
specification_version:
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 2.4.5
|
64
|
+
signing_key:
|
65
|
+
specification_version: 4
|
71
66
|
summary: grok bindings for ruby
|
72
67
|
test_files: []
|
data/lib/grok/pure/pile.rb
DELETED
@@ -1,71 +0,0 @@
|
|
1
|
-
require "grok-pure"
|
2
|
-
require "logger"
|
3
|
-
require "cabin"
|
4
|
-
|
5
|
-
# A grok pile is an easy way to have multiple patterns together so
|
6
|
-
# that you can try to match against each one.
|
7
|
-
# The API provided should be similar to the normal Grok
|
8
|
-
# interface, but you can compile multiple patterns and match will
|
9
|
-
# try each one until a match is found.
|
10
|
-
class Grok
|
11
|
-
class Pile
|
12
|
-
attr_accessor :logger
|
13
|
-
|
14
|
-
def initialize
|
15
|
-
@groks = []
|
16
|
-
@patterns = {}
|
17
|
-
@pattern_files = []
|
18
|
-
@logger = Cabin::Channel.new
|
19
|
-
@logger.subscribe(Logger.new(STDOUT))
|
20
|
-
@logger.level = :warn
|
21
|
-
end # def initialize
|
22
|
-
|
23
|
-
def logger=(logger)
|
24
|
-
@logger = logger
|
25
|
-
@groks.each { |g| g.logger = logger }
|
26
|
-
end # def logger=
|
27
|
-
|
28
|
-
# see Grok#add_pattern
|
29
|
-
def add_pattern(name, string)
|
30
|
-
@patterns[name] = string
|
31
|
-
end # def add_pattern
|
32
|
-
|
33
|
-
# see Grok#add_patterns_from_file
|
34
|
-
def add_patterns_from_file(path)
|
35
|
-
if !File.exists?(path)
|
36
|
-
raise "File does not exist: #{path}"
|
37
|
-
end
|
38
|
-
@pattern_files << path
|
39
|
-
end # def add_patterns_from_file
|
40
|
-
|
41
|
-
# see Grok#compile
|
42
|
-
def compile(pattern)
|
43
|
-
grok = Grok.new
|
44
|
-
grok.logger = @logger unless @logger.nil?
|
45
|
-
@patterns.each do |name, value|
|
46
|
-
grok.add_pattern(name, value)
|
47
|
-
end
|
48
|
-
@pattern_files.each do |path|
|
49
|
-
grok.add_patterns_from_file(path)
|
50
|
-
end
|
51
|
-
grok.compile(pattern)
|
52
|
-
@logger.info("Pile compiled new grok", :pattern => pattern,
|
53
|
-
:expanded_pattern => grok.expanded_pattern)
|
54
|
-
@groks << grok
|
55
|
-
end # def compile
|
56
|
-
|
57
|
-
# Slight difference from Grok#match in that it returns
|
58
|
-
# the Grok instance that matched successfully in addition
|
59
|
-
# to the GrokMatch result.
|
60
|
-
# See also: Grok#match
|
61
|
-
def match(string)
|
62
|
-
@groks.each do |grok|
|
63
|
-
match = grok.match(string)
|
64
|
-
if match
|
65
|
-
return [grok, match]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
return false
|
69
|
-
end # def match
|
70
|
-
end # class Pile
|
71
|
-
end # class Grok
|