jls-grok 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/grok-pure.rb +14 -4
- data/lib/grok/pure/discovery.rb +78 -0
- metadata +28 -52
- data/lib/Grok.rb +0 -3
data/lib/grok-pure.rb
CHANGED
@@ -4,9 +4,17 @@ require "cabin"
|
|
4
4
|
|
5
5
|
# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
|
6
6
|
class Grok
|
7
|
+
# The pattern input
|
7
8
|
attr_accessor :pattern
|
9
|
+
|
10
|
+
# The fully-expanded pattern (in regex form)
|
8
11
|
attr_accessor :expanded_pattern
|
12
|
+
|
13
|
+
# The logger
|
9
14
|
attr_accessor :logger
|
15
|
+
|
16
|
+
# The dictionary of pattern names to pattern expressions
|
17
|
+
attr_accessor :patterns
|
10
18
|
|
11
19
|
PATTERN_RE = \
|
12
20
|
/%\{ # match '%{' not prefixed with '\'
|
@@ -58,6 +66,7 @@ class Grok
|
|
58
66
|
next if line =~ /^\s*#/
|
59
67
|
# File format is: NAME ' '+ PATTERN '\n'
|
60
68
|
name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
|
69
|
+
#p name => pattern
|
61
70
|
# If the line is malformed, skip it.
|
62
71
|
next if pattern.nil?
|
63
72
|
# Trim newline and add the pattern.
|
@@ -70,9 +79,9 @@ class Grok
|
|
70
79
|
def compile(pattern)
|
71
80
|
@capture_map = {}
|
72
81
|
|
73
|
-
iterations_left =
|
82
|
+
iterations_left = 10000
|
74
83
|
@pattern = pattern
|
75
|
-
@expanded_pattern = pattern
|
84
|
+
@expanded_pattern = pattern.clone
|
76
85
|
index = 0
|
77
86
|
|
78
87
|
# Replace any instances of '%{FOO}' with that pattern.
|
@@ -151,8 +160,9 @@ class Grok
|
|
151
160
|
|
152
161
|
private
|
153
162
|
def init_discover
|
154
|
-
|
155
|
-
@discover
|
163
|
+
require "grok/pure/discovery"
|
164
|
+
@discover = Grok::Discovery.new(self)
|
165
|
+
@discover.logger = @logger
|
156
166
|
end # def init_discover
|
157
167
|
|
158
168
|
public
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "grok-pure"
|
2
|
+
require "logger"
|
3
|
+
|
4
|
+
class Grok::Discovery
|
5
|
+
attr_accessor :logger
|
6
|
+
|
7
|
+
def initialize(grok)
|
8
|
+
@grok = grok
|
9
|
+
@logger = Cabin::Channel.new
|
10
|
+
@logger.subscribe(Logger.new(STDOUT))
|
11
|
+
@logger.level = :warn
|
12
|
+
end # def initialize
|
13
|
+
|
14
|
+
def discover(text)
|
15
|
+
text = text.clone
|
16
|
+
# TODO(sissel): Sort patterns by complexity, most complex first.
|
17
|
+
# - For each pattern, compile it in a grok by itself.
|
18
|
+
# - Make a dictionary of { "name" => Grok } for each pattern
|
19
|
+
# - Sort groks by complexity of the Grok#expanded_pattern
|
20
|
+
groks = {}
|
21
|
+
@grok.patterns.each do |name, expression|
|
22
|
+
grok = Grok.new
|
23
|
+
# Copy in the same grok patterns from the parent
|
24
|
+
grok.patterns.merge!(@grok.patterns)
|
25
|
+
grok.compile("%{#{name}}")
|
26
|
+
groks[name] = grok
|
27
|
+
end
|
28
|
+
|
29
|
+
patterns = groks.sort { |a, b| compare(a, b) }
|
30
|
+
|
31
|
+
done = false
|
32
|
+
while !done
|
33
|
+
done = true # will reset this if we are not done later.
|
34
|
+
patterns.each do |name, grok|
|
35
|
+
# Skip patterns that lack complexity (SPACE, NOTSPACE, DATA, etc)
|
36
|
+
next if complexity(grok.expanded_pattern) < 20
|
37
|
+
m = grok.match(text)
|
38
|
+
# Skip non-matches
|
39
|
+
next unless m
|
40
|
+
part = text[m.start ... m.end]
|
41
|
+
# Only include things that have word boundaries (not just words)
|
42
|
+
next if part !~ /.\b./
|
43
|
+
# Skip over parts that appear to include %{pattern} already
|
44
|
+
next if part =~ /%{[^}+]}/
|
45
|
+
acting = true
|
46
|
+
text[m.start ... m.end] = "%{#{name}}"
|
47
|
+
|
48
|
+
# Start the loop over again
|
49
|
+
done = false
|
50
|
+
break
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
return text
|
55
|
+
end # def discover
|
56
|
+
|
57
|
+
private
|
58
|
+
def compare(a, b)
|
59
|
+
# a and be are each: [ name, grok ]
|
60
|
+
# sort highest complexity first
|
61
|
+
return complexity(b.last.expanded_pattern) <=> complexity(a.last.expanded_pattern)
|
62
|
+
end # def compare
|
63
|
+
|
64
|
+
private
|
65
|
+
def complexity(expression)
|
66
|
+
score = expression.count("|") # number of branches in the pattern
|
67
|
+
score += expression.length # the length of the pattern
|
68
|
+
end # def complexity
|
69
|
+
|
70
|
+
end # class Grok::Discovery
|
71
|
+
|
72
|
+
#/* Compute the relative complexity of a pattern */
|
73
|
+
#static int complexity(const grok_t *grok) {
|
74
|
+
#int score;
|
75
|
+
#score += string_count(grok->full_pattern, "|");
|
76
|
+
#score += strlen(grok->full_pattern) / 2;
|
77
|
+
#return -score; /* Sort most-complex first */
|
78
|
+
#}
|
metadata
CHANGED
@@ -1,90 +1,66 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: jls-grok
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.10.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 9
|
9
|
-
- 6
|
10
|
-
version: 0.9.6
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Jordan Sissel
|
14
9
|
- Pete Fritchman
|
15
10
|
autorequire:
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2012-02-03 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
22
16
|
name: cabin
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &20862820 !ruby/object:Gem::Requirement
|
25
18
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 21
|
30
|
-
segments:
|
31
|
-
- 0
|
32
|
-
- 1
|
33
|
-
- 7
|
19
|
+
requirements:
|
20
|
+
- - =
|
21
|
+
- !ruby/object:Gem::Version
|
34
22
|
version: 0.1.7
|
35
23
|
type: :runtime
|
36
|
-
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *20862820
|
37
26
|
description: Grok ruby bindings - pattern match/extraction tool
|
38
|
-
email:
|
27
|
+
email:
|
39
28
|
- jls@semicomplete.com
|
40
29
|
- petef@databits.net
|
41
30
|
executables: []
|
42
|
-
|
43
31
|
extensions: []
|
44
|
-
|
45
32
|
extra_rdoc_files: []
|
46
|
-
|
47
|
-
files:
|
33
|
+
files:
|
48
34
|
- lib/grok-pure.rb
|
49
35
|
- lib/grok.rb
|
50
36
|
- lib/grok/c-ext/pile.rb
|
51
37
|
- lib/grok/c-ext/match.rb
|
38
|
+
- lib/grok/pure/discovery.rb
|
52
39
|
- lib/grok/pure/pile.rb
|
53
40
|
- lib/grok/pure/match.rb
|
54
|
-
- lib/Grok.rb
|
55
41
|
homepage: http://code.google.com/p/semicomplete/wiki/Grok
|
56
42
|
licenses: []
|
57
|
-
|
58
43
|
post_install_message:
|
59
44
|
rdoc_options: []
|
60
|
-
|
61
|
-
require_paths:
|
45
|
+
require_paths:
|
62
46
|
- lib
|
63
47
|
- lib
|
64
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
49
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
70
|
-
|
71
|
-
- 0
|
72
|
-
version: "0"
|
73
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
55
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
|
79
|
-
segments:
|
80
|
-
- 0
|
81
|
-
version: "0"
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
82
60
|
requirements: []
|
83
|
-
|
84
61
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.8.
|
62
|
+
rubygems_version: 1.8.10
|
86
63
|
signing_key:
|
87
64
|
specification_version: 3
|
88
65
|
summary: grok bindings for ruby
|
89
66
|
test_files: []
|
90
|
-
|
data/lib/Grok.rb
DELETED