jls-grok 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,9 +4,17 @@ require "cabin"
4
4
 
5
5
  # TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
6
6
  class Grok
7
+ # The pattern input
7
8
  attr_accessor :pattern
9
+
10
+ # The fully-expanded pattern (in regex form)
8
11
  attr_accessor :expanded_pattern
12
+
13
+ # The logger
9
14
  attr_accessor :logger
15
+
16
+ # The dictionary of pattern names to pattern expressions
17
+ attr_accessor :patterns
10
18
 
11
19
  PATTERN_RE = \
12
20
  /%\{ # match '%{' not prefixed with '\'
@@ -58,6 +66,7 @@ class Grok
58
66
  next if line =~ /^\s*#/
59
67
  # File format is: NAME ' '+ PATTERN '\n'
60
68
  name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
69
+ #p name => pattern
61
70
  # If the line is malformed, skip it.
62
71
  next if pattern.nil?
63
72
  # Trim newline and add the pattern.
@@ -70,9 +79,9 @@ class Grok
70
79
  def compile(pattern)
71
80
  @capture_map = {}
72
81
 
73
- iterations_left = 1000
82
+ iterations_left = 10000
74
83
  @pattern = pattern
75
- @expanded_pattern = pattern
84
+ @expanded_pattern = pattern.clone
76
85
  index = 0
77
86
 
78
87
  # Replace any instances of '%{FOO}' with that pattern.
@@ -151,8 +160,9 @@ class Grok
151
160
 
152
161
  private
153
162
  def init_discover
154
- @discover = GrokDiscover.new(self)
155
- @discover.logmask = logmask
163
+ require "grok/pure/discovery"
164
+ @discover = Grok::Discovery.new(self)
165
+ @discover.logger = @logger
156
166
  end # def init_discover
157
167
 
158
168
  public
@@ -0,0 +1,78 @@
1
+ require "grok-pure"
2
+ require "logger"
3
+
4
+ class Grok::Discovery
5
+ attr_accessor :logger
6
+
7
+ def initialize(grok)
8
+ @grok = grok
9
+ @logger = Cabin::Channel.new
10
+ @logger.subscribe(Logger.new(STDOUT))
11
+ @logger.level = :warn
12
+ end # def initialize
13
+
14
+ def discover(text)
15
+ text = text.clone
16
+ # TODO(sissel): Sort patterns by complexity, most complex first.
17
+ # - For each pattern, compile it in a grok by itself.
18
+ # - Make a dictionary of { "name" => Grok } for each pattern
19
+ # - Sort groks by complexity of the Grok#expanded_pattern
20
+ groks = {}
21
+ @grok.patterns.each do |name, expression|
22
+ grok = Grok.new
23
+ # Copy in the same grok patterns from the parent
24
+ grok.patterns.merge!(@grok.patterns)
25
+ grok.compile("%{#{name}}")
26
+ groks[name] = grok
27
+ end
28
+
29
+ patterns = groks.sort { |a, b| compare(a, b) }
30
+
31
+ done = false
32
+ while !done
33
+ done = true # will reset this if we are not done later.
34
+ patterns.each do |name, grok|
35
+ # Skip patterns that lack complexity (SPACE, NOTSPACE, DATA, etc)
36
+ next if complexity(grok.expanded_pattern) < 20
37
+ m = grok.match(text)
38
+ # Skip non-matches
39
+ next unless m
40
+ part = text[m.start ... m.end]
41
+ # Only include things that have word boundaries (not just words)
42
+ next if part !~ /.\b./
43
+ # Skip over parts that appear to include %{pattern} already
44
+ next if part =~ /%{[^}+]}/
45
+ acting = true
46
+ text[m.start ... m.end] = "%{#{name}}"
47
+
48
+ # Start the loop over again
49
+ done = false
50
+ break
51
+ end
52
+ end
53
+
54
+ return text
55
+ end # def discover
56
+
57
+ private
58
+ def compare(a, b)
59
+ # a and be are each: [ name, grok ]
60
+ # sort highest complexity first
61
+ return complexity(b.last.expanded_pattern) <=> complexity(a.last.expanded_pattern)
62
+ end # def compare
63
+
64
+ private
65
+ def complexity(expression)
66
+ score = expression.count("|") # number of branches in the pattern
67
+ score += expression.length # the length of the pattern
68
+ end # def complexity
69
+
70
+ end # class Grok::Discovery
71
+
72
+ #/* Compute the relative complexity of a pattern */
73
+ #static int complexity(const grok_t *grok) {
74
+ #int score;
75
+ #score += string_count(grok->full_pattern, "|");
76
+ #score += strlen(grok->full_pattern) / 2;
77
+ #return -score; /* Sort most-complex first */
78
+ #}
metadata CHANGED
@@ -1,90 +1,66 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: jls-grok
3
- version: !ruby/object:Gem::Version
4
- hash: 55
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.10.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 9
9
- - 6
10
- version: 0.9.6
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Jordan Sissel
14
9
  - Pete Fritchman
15
10
  autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
-
19
- date: 2012-01-17 00:00:00 Z
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
13
+ date: 2012-02-03 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: cabin
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &20862820 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
27
- - - "="
28
- - !ruby/object:Gem::Version
29
- hash: 21
30
- segments:
31
- - 0
32
- - 1
33
- - 7
19
+ requirements:
20
+ - - =
21
+ - !ruby/object:Gem::Version
34
22
  version: 0.1.7
35
23
  type: :runtime
36
- version_requirements: *id001
24
+ prerelease: false
25
+ version_requirements: *20862820
37
26
  description: Grok ruby bindings - pattern match/extraction tool
38
- email:
27
+ email:
39
28
  - jls@semicomplete.com
40
29
  - petef@databits.net
41
30
  executables: []
42
-
43
31
  extensions: []
44
-
45
32
  extra_rdoc_files: []
46
-
47
- files:
33
+ files:
48
34
  - lib/grok-pure.rb
49
35
  - lib/grok.rb
50
36
  - lib/grok/c-ext/pile.rb
51
37
  - lib/grok/c-ext/match.rb
38
+ - lib/grok/pure/discovery.rb
52
39
  - lib/grok/pure/pile.rb
53
40
  - lib/grok/pure/match.rb
54
- - lib/Grok.rb
55
41
  homepage: http://code.google.com/p/semicomplete/wiki/Grok
56
42
  licenses: []
57
-
58
43
  post_install_message:
59
44
  rdoc_options: []
60
-
61
- require_paths:
45
+ require_paths:
62
46
  - lib
63
47
  - lib
64
- required_ruby_version: !ruby/object:Gem::Requirement
48
+ required_ruby_version: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 3
70
- segments:
71
- - 0
72
- version: "0"
73
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
55
  none: false
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- hash: 3
79
- segments:
80
- - 0
81
- version: "0"
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
82
60
  requirements: []
83
-
84
61
  rubyforge_project:
85
- rubygems_version: 1.8.13
62
+ rubygems_version: 1.8.10
86
63
  signing_key:
87
64
  specification_version: 3
88
65
  summary: grok bindings for ruby
89
66
  test_files: []
90
-
@@ -1,3 +0,0 @@
1
- require "grok.rb"
2
-
3
- # compat for when grok was Grok.so