jls-grok 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,9 +4,17 @@ require "cabin"
4
4
 
5
5
  # TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
6
6
  class Grok
7
+ # The pattern input
7
8
  attr_accessor :pattern
9
+
10
+ # The fully-expanded pattern (in regex form)
8
11
  attr_accessor :expanded_pattern
12
+
13
+ # The logger
9
14
  attr_accessor :logger
15
+
16
+ # The dictionary of pattern names to pattern expressions
17
+ attr_accessor :patterns
10
18
 
11
19
  PATTERN_RE = \
12
20
  /%\{ # match '%{' not prefixed with '\'
@@ -58,6 +66,7 @@ class Grok
58
66
  next if line =~ /^\s*#/
59
67
  # File format is: NAME ' '+ PATTERN '\n'
60
68
  name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
69
+ #p name => pattern
61
70
  # If the line is malformed, skip it.
62
71
  next if pattern.nil?
63
72
  # Trim newline and add the pattern.
@@ -70,9 +79,9 @@ class Grok
70
79
  def compile(pattern)
71
80
  @capture_map = {}
72
81
 
73
- iterations_left = 1000
82
+ iterations_left = 10000
74
83
  @pattern = pattern
75
- @expanded_pattern = pattern
84
+ @expanded_pattern = pattern.clone
76
85
  index = 0
77
86
 
78
87
  # Replace any instances of '%{FOO}' with that pattern.
@@ -151,8 +160,9 @@ class Grok
151
160
 
152
161
  private
153
162
  def init_discover
154
- @discover = GrokDiscover.new(self)
155
- @discover.logmask = logmask
163
+ require "grok/pure/discovery"
164
+ @discover = Grok::Discovery.new(self)
165
+ @discover.logger = @logger
156
166
  end # def init_discover
157
167
 
158
168
  public
@@ -0,0 +1,78 @@
1
+ require "grok-pure"
2
+ require "logger"
3
+
4
+ class Grok::Discovery
5
+ attr_accessor :logger
6
+
7
+ def initialize(grok)
8
+ @grok = grok
9
+ @logger = Cabin::Channel.new
10
+ @logger.subscribe(Logger.new(STDOUT))
11
+ @logger.level = :warn
12
+ end # def initialize
13
+
14
+ def discover(text)
15
+ text = text.clone
16
+ # TODO(sissel): Sort patterns by complexity, most complex first.
17
+ # - For each pattern, compile it in a grok by itself.
18
+ # - Make a dictionary of { "name" => Grok } for each pattern
19
+ # - Sort groks by complexity of the Grok#expanded_pattern
20
+ groks = {}
21
+ @grok.patterns.each do |name, expression|
22
+ grok = Grok.new
23
+ # Copy in the same grok patterns from the parent
24
+ grok.patterns.merge!(@grok.patterns)
25
+ grok.compile("%{#{name}}")
26
+ groks[name] = grok
27
+ end
28
+
29
+ patterns = groks.sort { |a, b| compare(a, b) }
30
+
31
+ done = false
32
+ while !done
33
+ done = true # will reset this if we are not done later.
34
+ patterns.each do |name, grok|
35
+ # Skip patterns that lack complexity (SPACE, NOTSPACE, DATA, etc)
36
+ next if complexity(grok.expanded_pattern) < 20
37
+ m = grok.match(text)
38
+ # Skip non-matches
39
+ next unless m
40
+ part = text[m.start ... m.end]
41
+ # Only include things that have word boundaries (not just words)
42
+ next if part !~ /.\b./
43
+ # Skip over parts that appear to include %{pattern} already
44
+ next if part =~ /%{[^}+]}/
45
+ acting = true
46
+ text[m.start ... m.end] = "%{#{name}}"
47
+
48
+ # Start the loop over again
49
+ done = false
50
+ break
51
+ end
52
+ end
53
+
54
+ return text
55
+ end # def discover
56
+
57
+ private
58
+ def compare(a, b)
59
+ # a and be are each: [ name, grok ]
60
+ # sort highest complexity first
61
+ return complexity(b.last.expanded_pattern) <=> complexity(a.last.expanded_pattern)
62
+ end # def compare
63
+
64
+ private
65
+ def complexity(expression)
66
+ score = expression.count("|") # number of branches in the pattern
67
+ score += expression.length # the length of the pattern
68
+ end # def complexity
69
+
70
+ end # class Grok::Discovery
71
+
72
+ #/* Compute the relative complexity of a pattern */
73
+ #static int complexity(const grok_t *grok) {
74
+ #int score;
75
+ #score += string_count(grok->full_pattern, "|");
76
+ #score += strlen(grok->full_pattern) / 2;
77
+ #return -score; /* Sort most-complex first */
78
+ #}
metadata CHANGED
@@ -1,90 +1,66 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: jls-grok
3
- version: !ruby/object:Gem::Version
4
- hash: 55
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.10.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 9
9
- - 6
10
- version: 0.9.6
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Jordan Sissel
14
9
  - Pete Fritchman
15
10
  autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
-
19
- date: 2012-01-17 00:00:00 Z
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
13
+ date: 2012-02-03 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: cabin
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &20862820 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
27
- - - "="
28
- - !ruby/object:Gem::Version
29
- hash: 21
30
- segments:
31
- - 0
32
- - 1
33
- - 7
19
+ requirements:
20
+ - - =
21
+ - !ruby/object:Gem::Version
34
22
  version: 0.1.7
35
23
  type: :runtime
36
- version_requirements: *id001
24
+ prerelease: false
25
+ version_requirements: *20862820
37
26
  description: Grok ruby bindings - pattern match/extraction tool
38
- email:
27
+ email:
39
28
  - jls@semicomplete.com
40
29
  - petef@databits.net
41
30
  executables: []
42
-
43
31
  extensions: []
44
-
45
32
  extra_rdoc_files: []
46
-
47
- files:
33
+ files:
48
34
  - lib/grok-pure.rb
49
35
  - lib/grok.rb
50
36
  - lib/grok/c-ext/pile.rb
51
37
  - lib/grok/c-ext/match.rb
38
+ - lib/grok/pure/discovery.rb
52
39
  - lib/grok/pure/pile.rb
53
40
  - lib/grok/pure/match.rb
54
- - lib/Grok.rb
55
41
  homepage: http://code.google.com/p/semicomplete/wiki/Grok
56
42
  licenses: []
57
-
58
43
  post_install_message:
59
44
  rdoc_options: []
60
-
61
- require_paths:
45
+ require_paths:
62
46
  - lib
63
47
  - lib
64
- required_ruby_version: !ruby/object:Gem::Requirement
48
+ required_ruby_version: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 3
70
- segments:
71
- - 0
72
- version: "0"
73
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
55
  none: false
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- hash: 3
79
- segments:
80
- - 0
81
- version: "0"
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
82
60
  requirements: []
83
-
84
61
  rubyforge_project:
85
- rubygems_version: 1.8.13
62
+ rubygems_version: 1.8.10
86
63
  signing_key:
87
64
  specification_version: 3
88
65
  summary: grok bindings for ruby
89
66
  test_files: []
90
-
@@ -1,3 +0,0 @@
1
- require "grok.rb"
2
-
3
- # compat for when grok was Grok.so