jls-grok 0.10.12 → 0.10.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2f35ecc4902d02d2ea98af056466222dd36b33dc
4
+ data.tar.gz: 916b52edd7029e5711ecc5a5489e2fa642f1cc8d
5
+ SHA512:
6
+ metadata.gz: 404736a67e44293fd072047a6302bec733c72af889c2fb67a6c1cf8965a4f55c829c03ed5f0e6cd3e9545a77b9a21f6716f58aee8d7223ea9be4a90a8c620c04
7
+ data.tar.gz: 6680165222e03342034833cfe917c69d3fef8752894c716e277e0ae11b460f3f252c81445650612cb369d217e993cb5804597f6f5e0e862d390d3b48c1782c8b
@@ -1,7 +1,8 @@
1
1
  require "rubygems"
2
2
  require "logger"
3
- require "grok/pure/discovery"
4
3
  require "cabin"
4
+ require "grok/pure/discovery"
5
+ require "grok/pure/match"
5
6
 
6
7
  # TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
7
8
  class Grok
@@ -9,7 +10,7 @@ class Grok
9
10
 
10
11
  # The pattern input
11
12
  attr_accessor :pattern
12
-
13
+
13
14
  # The fully-expanded pattern (in regex form)
14
15
  attr_accessor :expanded_pattern
15
16
 
@@ -18,7 +19,7 @@ class Grok
18
19
 
19
20
  # The dictionary of pattern names to pattern expressions
20
21
  attr_accessor :patterns
21
-
22
+
22
23
  PATTERN_RE = \
23
24
  /%\{ # match '%{' not prefixed with '\'
24
25
  (?<name> # match the pattern name
@@ -50,6 +51,8 @@ class Grok
50
51
  @logger = Cabin::Channel.new
51
52
  @logger.subscribe(Logger.new(STDOUT))
52
53
  @logger.level = :warn
54
+ # Captures Lambda which is generated at Grok compile time and called at match time
55
+ @captures_func = nil
53
56
 
54
57
  # TODO(sissel): Throw exception if we aren't using Ruby 1.9.2 or newer.
55
58
  end # def initialize
@@ -66,7 +69,7 @@ class Grok
66
69
  file = File.new(path, "r")
67
70
  file.each do |line|
68
71
  # Skip comments
69
- next if line =~ /^\s*#/
72
+ next if line =~ /^\s*#/
70
73
  # File format is: NAME ' '+ PATTERN '\n'
71
74
  name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
72
75
  #p name => pattern
@@ -76,16 +79,15 @@ class Grok
76
79
  add_pattern(name, pattern.chomp)
77
80
  end
78
81
  return nil
82
+ ensure
83
+ file.close
79
84
  end # def add_patterns_from_file
80
85
 
81
86
  public
82
- def compile(pattern)
83
- @capture_map = {}
84
-
87
+ def compile(pattern, named_captures_only=false)
85
88
  iterations_left = 10000
86
89
  @pattern = pattern
87
90
  @expanded_pattern = pattern.clone
88
- index = 0
89
91
 
90
92
  # Replace any instances of '%{FOO}' with that pattern.
91
93
  loop do
@@ -101,61 +103,87 @@ class Grok
101
103
  end
102
104
 
103
105
  if @patterns.include?(m["pattern"])
104
- # create a named capture index that we can push later as the named
105
- # pattern. We do this because ruby regexp can't capture something
106
- # by the same name twice.
107
106
  regex = @patterns[m["pattern"]]
108
- #puts "patterns[#{m["pattern"]}] => #{regex}"
107
+ name = m["name"]
109
108
 
110
- capture = "a#{index}" # named captures have to start with letters?
111
- #capture = "%04d" % "#{index}" # named captures have to start with letters?
112
- replacement_pattern = "(?<#{capture}>#{regex})"
113
- @capture_map[capture] = m["name"]
114
-
115
- #puts "Before: #{@expanded_pattern}"
116
- #puts "m[0]: #{m[0]}"
117
- #puts "replacement_pattern => #{replacement_pattern}"
118
- #puts "Proposed: #{@expanded_pattern.sub(m[0], replacement_pattern)}"
109
+ if named_captures_only && name.index(":").nil?
110
+ # this has no semantic (pattern:foo) so we don't need to capture
111
+ replacement_pattern = "(?:#{regex})"
112
+ else
113
+ replacement_pattern = "(?<#{name}>#{regex})"
114
+ end
119
115
 
120
116
  # Ruby's String#sub() has a bug (or misfeature) that causes it to do bad
121
117
  # things to backslashes in string replacements, so let's work around it
122
118
  # See this gist for more details: https://gist.github.com/1491437
123
119
  # This hack should resolve LOGSTASH-226.
124
120
  @expanded_pattern.sub!(m[0]) { |s| replacement_pattern }
125
-
126
- #puts "After: #{@expanded_pattern}"
127
- #puts "m[0]: #{m[0]}"
128
- #puts "replacement_pattern => #{replacement_pattern}"
129
- index += 1
121
+ @logger.debug? and @logger.debug("replacement_pattern => #{replacement_pattern}")
130
122
  else
131
123
  raise PatternError, "pattern #{m[0]} not defined"
132
124
  end
133
125
  end
134
126
 
135
- #@logger.debug("Finished expanding", :string => @expanded_pattern)
136
- #puts "Expanded: #{@expanded_pattern}"
137
- @regexp = Regexp.new(@expanded_pattern)
138
- @logger.debug("Grok compiled OK", :pattern => pattern,
139
- :expanded_pattern => @expanded_pattern)
140
- end # def compile
127
+ @regexp = Regexp.new(@expanded_pattern, Regexp::MULTILINE)
128
+ @logger.debug? and @logger.debug("Grok compiled OK", :pattern => pattern,
129
+ :expanded_pattern => @expanded_pattern)
130
+
131
+ @captures_func = compile_captures_func(@regexp)
132
+ end
133
+
134
+ private
135
+ # compiles the captures lambda so runtime match can be optimized
136
+ def compile_captures_func(re)
137
+ re_match = ["lambda do |match, &block|"]
138
+ re.named_captures.each do |name, indices|
139
+ pattern, name, coerce = name.split(":")
140
+ indices.each do |index|
141
+ coerce = case coerce
142
+ when "int"; ".to_i"
143
+ when "float"; ".to_f"
144
+ else; ""
145
+ end
146
+ name = pattern if name.nil?
147
+ re_match << " block.call(#{name.inspect}, match[#{index}]#{coerce})"
148
+ end
149
+ end
150
+ re_match << "end"
151
+ return eval(re_match.join("\n"))
152
+ end # def compile_captures_func
141
153
 
142
154
  public
143
155
  def match(text)
144
156
  match = @regexp.match(text)
145
-
146
157
  if match
147
158
  grokmatch = Grok::Match.new
148
159
  grokmatch.subject = text
149
- grokmatch.start, grokmatch.end = match.offset(0)
150
160
  grokmatch.grok = self
151
161
  grokmatch.match = match
152
- @logger.debug("Regexp match object", :names => match.names, :captures => match.captures)
162
+ @logger.debug? and @logger.debug("Regexp match object", :names => match.names,
163
+ :captures => match.captures)
153
164
  return grokmatch
154
165
  else
155
166
  return false
156
167
  end
157
168
  end # def match
158
169
 
170
+ # Optimized match and capture instead of calling them separately
171
+ def match_and_capture(text)
172
+ match = @regexp.match(text)
173
+ if match
174
+ @logger.debug? and @logger.debug("Regexp match object", :names => match.names,
175
+ :captures => match.captures)
176
+ @captures_func.call(match) { |k,v| yield k,v }
177
+ return true
178
+ else
179
+ return false
180
+ end
181
+ end # def match_and_capture
182
+
183
+ def capture(match, block)
184
+ @captures_func.call(match) { |k,v| block.call k,v }
185
+ end # def capture
186
+
159
187
  public
160
188
  def discover(input)
161
189
  init_discover if @discover == nil
@@ -170,11 +198,4 @@ class Grok
170
198
  @discover.logger = @logger
171
199
  end # def init_discover
172
200
 
173
- public
174
- def capture_name(id)
175
- return @capture_map[id]
176
- end # def capture_name
177
201
  end # Grok
178
-
179
- require "grok/pure/match"
180
- require "grok/pure/pile"
@@ -2,8 +2,6 @@ require "grok-pure"
2
2
 
3
3
  class Grok::Match
4
4
  attr_accessor :subject
5
- attr_accessor :start
6
- attr_accessor :end
7
5
  attr_accessor :grok
8
6
  attr_accessor :match
9
7
 
@@ -13,16 +11,8 @@ class Grok::Match
13
11
  end
14
12
 
15
13
  public
16
- def each_capture
17
- @captures = Hash.new { |h, k| h[k] = Array.new }
18
-
19
- #p :expanded => @grok.expanded_pattern
20
- #p :map => @grok.capture_map
21
- @match.names.zip(@match.captures).each do |id, value|
22
- name = @grok.capture_name(id) || "_:#{id}"
23
- yield name, value
24
- end
25
-
14
+ def each_capture(&block)
15
+ @grok.capture(@match, block)
26
16
  end # def each_capture
27
17
 
28
18
  public
metadata CHANGED
@@ -1,33 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jls-grok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.12
5
- prerelease:
4
+ version: 0.10.13
6
5
  platform: ruby
7
6
  authors:
8
7
  - Jordan Sissel
9
8
  - Pete Fritchman
10
- autorequire:
9
+ autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2013-10-07 00:00:00.000000000 Z
12
+ date: 2015-02-09 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: cabin
17
- requirement: !ruby/object:Gem::Requirement
18
- none: false
16
+ version_requirements: !ruby/object:Gem::Requirement
19
17
  requirements:
20
- - - ! '>='
18
+ - - '>='
21
19
  - !ruby/object:Gem::Version
22
20
  version: 0.6.0
23
- type: :runtime
24
- prerelease: false
25
- version_requirements: !ruby/object:Gem::Requirement
26
- none: false
21
+ requirement: !ruby/object:Gem::Requirement
27
22
  requirements:
28
- - - ! '>='
23
+ - - '>='
29
24
  - !ruby/object:Gem::Version
30
25
  version: 0.6.0
26
+ prerelease: false
27
+ type: :runtime
31
28
  description: Grok ruby bindings - pattern match/extraction tool
32
29
  email:
33
30
  - jls@semicomplete.com
@@ -36,37 +33,35 @@ executables: []
36
33
  extensions: []
37
34
  extra_rdoc_files: []
38
35
  files:
36
+ - lib/grok-pure.rb
39
37
  - lib/grok.rb
38
+ - lib/grok/c-ext/match.rb
39
+ - lib/grok/c-ext/pile.rb
40
40
  - lib/grok/namespace.rb
41
41
  - lib/grok/pure/discovery.rb
42
- - lib/grok/pure/pile.rb
43
42
  - lib/grok/pure/match.rb
44
- - lib/grok/c-ext/pile.rb
45
- - lib/grok/c-ext/match.rb
46
- - lib/grok-pure.rb
47
43
  homepage: http://code.google.com/p/semicomplete/wiki/Grok
48
44
  licenses: []
49
- post_install_message:
45
+ metadata: {}
46
+ post_install_message:
50
47
  rdoc_options: []
51
48
  require_paths:
52
49
  - lib
53
50
  - lib
54
51
  required_ruby_version: !ruby/object:Gem::Requirement
55
- none: false
56
52
  requirements:
57
- - - ! '>='
53
+ - - '>='
58
54
  - !ruby/object:Gem::Version
59
55
  version: '0'
60
56
  required_rubygems_version: !ruby/object:Gem::Requirement
61
- none: false
62
57
  requirements:
63
- - - ! '>='
58
+ - - '>='
64
59
  - !ruby/object:Gem::Version
65
60
  version: '0'
66
61
  requirements: []
67
- rubyforge_project:
68
- rubygems_version: 1.8.25
69
- signing_key:
70
- specification_version: 3
62
+ rubyforge_project:
63
+ rubygems_version: 2.4.5
64
+ signing_key:
65
+ specification_version: 4
71
66
  summary: grok bindings for ruby
72
67
  test_files: []
@@ -1,71 +0,0 @@
1
- require "grok-pure"
2
- require "logger"
3
- require "cabin"
4
-
5
- # A grok pile is an easy way to have multiple patterns together so
6
- # that you can try to match against each one.
7
- # The API provided should be similar to the normal Grok
8
- # interface, but you can compile multiple patterns and match will
9
- # try each one until a match is found.
10
- class Grok
11
- class Pile
12
- attr_accessor :logger
13
-
14
- def initialize
15
- @groks = []
16
- @patterns = {}
17
- @pattern_files = []
18
- @logger = Cabin::Channel.new
19
- @logger.subscribe(Logger.new(STDOUT))
20
- @logger.level = :warn
21
- end # def initialize
22
-
23
- def logger=(logger)
24
- @logger = logger
25
- @groks.each { |g| g.logger = logger }
26
- end # def logger=
27
-
28
- # see Grok#add_pattern
29
- def add_pattern(name, string)
30
- @patterns[name] = string
31
- end # def add_pattern
32
-
33
- # see Grok#add_patterns_from_file
34
- def add_patterns_from_file(path)
35
- if !File.exists?(path)
36
- raise "File does not exist: #{path}"
37
- end
38
- @pattern_files << path
39
- end # def add_patterns_from_file
40
-
41
- # see Grok#compile
42
- def compile(pattern)
43
- grok = Grok.new
44
- grok.logger = @logger unless @logger.nil?
45
- @patterns.each do |name, value|
46
- grok.add_pattern(name, value)
47
- end
48
- @pattern_files.each do |path|
49
- grok.add_patterns_from_file(path)
50
- end
51
- grok.compile(pattern)
52
- @logger.info("Pile compiled new grok", :pattern => pattern,
53
- :expanded_pattern => grok.expanded_pattern)
54
- @groks << grok
55
- end # def compile
56
-
57
- # Slight difference from Grok#match in that it returns
58
- # the Grok instance that matched successfully in addition
59
- # to the GrokMatch result.
60
- # See also: Grok#match
61
- def match(string)
62
- @groks.each do |grok|
63
- match = grok.match(string)
64
- if match
65
- return [grok, match]
66
- end
67
- end
68
- return false
69
- end # def match
70
- end # class Pile
71
- end # class Grok