jls-grok 0.9.6 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/grok-pure.rb +14 -4
- data/lib/grok/pure/discovery.rb +78 -0
- metadata +28 -52
- data/lib/Grok.rb +0 -3
data/lib/grok-pure.rb
CHANGED
@@ -4,9 +4,17 @@ require "cabin"
|
|
4
4
|
|
5
5
|
# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
|
6
6
|
class Grok
|
7
|
+
# The pattern input
|
7
8
|
attr_accessor :pattern
|
9
|
+
|
10
|
+
# The fully-expanded pattern (in regex form)
|
8
11
|
attr_accessor :expanded_pattern
|
12
|
+
|
13
|
+
# The logger
|
9
14
|
attr_accessor :logger
|
15
|
+
|
16
|
+
# The dictionary of pattern names to pattern expressions
|
17
|
+
attr_accessor :patterns
|
10
18
|
|
11
19
|
PATTERN_RE = \
|
12
20
|
/%\{ # match '%{' not prefixed with '\'
|
@@ -58,6 +66,7 @@ class Grok
|
|
58
66
|
next if line =~ /^\s*#/
|
59
67
|
# File format is: NAME ' '+ PATTERN '\n'
|
60
68
|
name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
|
69
|
+
#p name => pattern
|
61
70
|
# If the line is malformed, skip it.
|
62
71
|
next if pattern.nil?
|
63
72
|
# Trim newline and add the pattern.
|
@@ -70,9 +79,9 @@ class Grok
|
|
70
79
|
def compile(pattern)
|
71
80
|
@capture_map = {}
|
72
81
|
|
73
|
-
iterations_left =
|
82
|
+
iterations_left = 10000
|
74
83
|
@pattern = pattern
|
75
|
-
@expanded_pattern = pattern
|
84
|
+
@expanded_pattern = pattern.clone
|
76
85
|
index = 0
|
77
86
|
|
78
87
|
# Replace any instances of '%{FOO}' with that pattern.
|
@@ -151,8 +160,9 @@ class Grok
|
|
151
160
|
|
152
161
|
private
|
153
162
|
def init_discover
|
154
|
-
|
155
|
-
@discover
|
163
|
+
require "grok/pure/discovery"
|
164
|
+
@discover = Grok::Discovery.new(self)
|
165
|
+
@discover.logger = @logger
|
156
166
|
end # def init_discover
|
157
167
|
|
158
168
|
public
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "grok-pure"
|
2
|
+
require "logger"
|
3
|
+
|
4
|
+
class Grok::Discovery
|
5
|
+
attr_accessor :logger
|
6
|
+
|
7
|
+
def initialize(grok)
|
8
|
+
@grok = grok
|
9
|
+
@logger = Cabin::Channel.new
|
10
|
+
@logger.subscribe(Logger.new(STDOUT))
|
11
|
+
@logger.level = :warn
|
12
|
+
end # def initialize
|
13
|
+
|
14
|
+
def discover(text)
|
15
|
+
text = text.clone
|
16
|
+
# TODO(sissel): Sort patterns by complexity, most complex first.
|
17
|
+
# - For each pattern, compile it in a grok by itself.
|
18
|
+
# - Make a dictionary of { "name" => Grok } for each pattern
|
19
|
+
# - Sort groks by complexity of the Grok#expanded_pattern
|
20
|
+
groks = {}
|
21
|
+
@grok.patterns.each do |name, expression|
|
22
|
+
grok = Grok.new
|
23
|
+
# Copy in the same grok patterns from the parent
|
24
|
+
grok.patterns.merge!(@grok.patterns)
|
25
|
+
grok.compile("%{#{name}}")
|
26
|
+
groks[name] = grok
|
27
|
+
end
|
28
|
+
|
29
|
+
patterns = groks.sort { |a, b| compare(a, b) }
|
30
|
+
|
31
|
+
done = false
|
32
|
+
while !done
|
33
|
+
done = true # will reset this if we are not done later.
|
34
|
+
patterns.each do |name, grok|
|
35
|
+
# Skip patterns that lack complexity (SPACE, NOTSPACE, DATA, etc)
|
36
|
+
next if complexity(grok.expanded_pattern) < 20
|
37
|
+
m = grok.match(text)
|
38
|
+
# Skip non-matches
|
39
|
+
next unless m
|
40
|
+
part = text[m.start ... m.end]
|
41
|
+
# Only include things that have word boundaries (not just words)
|
42
|
+
next if part !~ /.\b./
|
43
|
+
# Skip over parts that appear to include %{pattern} already
|
44
|
+
next if part =~ /%{[^}+]}/
|
45
|
+
acting = true
|
46
|
+
text[m.start ... m.end] = "%{#{name}}"
|
47
|
+
|
48
|
+
# Start the loop over again
|
49
|
+
done = false
|
50
|
+
break
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
return text
|
55
|
+
end # def discover
|
56
|
+
|
57
|
+
private
|
58
|
+
def compare(a, b)
|
59
|
+
# a and be are each: [ name, grok ]
|
60
|
+
# sort highest complexity first
|
61
|
+
return complexity(b.last.expanded_pattern) <=> complexity(a.last.expanded_pattern)
|
62
|
+
end # def compare
|
63
|
+
|
64
|
+
private
|
65
|
+
def complexity(expression)
|
66
|
+
score = expression.count("|") # number of branches in the pattern
|
67
|
+
score += expression.length # the length of the pattern
|
68
|
+
end # def complexity
|
69
|
+
|
70
|
+
end # class Grok::Discovery
|
71
|
+
|
72
|
+
#/* Compute the relative complexity of a pattern */
|
73
|
+
#static int complexity(const grok_t *grok) {
|
74
|
+
#int score;
|
75
|
+
#score += string_count(grok->full_pattern, "|");
|
76
|
+
#score += strlen(grok->full_pattern) / 2;
|
77
|
+
#return -score; /* Sort most-complex first */
|
78
|
+
#}
|
metadata
CHANGED
@@ -1,90 +1,66 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: jls-grok
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.10.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 9
|
9
|
-
- 6
|
10
|
-
version: 0.9.6
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Jordan Sissel
|
14
9
|
- Pete Fritchman
|
15
10
|
autorequire:
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2012-02-03 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
22
16
|
name: cabin
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &20862820 !ruby/object:Gem::Requirement
|
25
18
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 21
|
30
|
-
segments:
|
31
|
-
- 0
|
32
|
-
- 1
|
33
|
-
- 7
|
19
|
+
requirements:
|
20
|
+
- - =
|
21
|
+
- !ruby/object:Gem::Version
|
34
22
|
version: 0.1.7
|
35
23
|
type: :runtime
|
36
|
-
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *20862820
|
37
26
|
description: Grok ruby bindings - pattern match/extraction tool
|
38
|
-
email:
|
27
|
+
email:
|
39
28
|
- jls@semicomplete.com
|
40
29
|
- petef@databits.net
|
41
30
|
executables: []
|
42
|
-
|
43
31
|
extensions: []
|
44
|
-
|
45
32
|
extra_rdoc_files: []
|
46
|
-
|
47
|
-
files:
|
33
|
+
files:
|
48
34
|
- lib/grok-pure.rb
|
49
35
|
- lib/grok.rb
|
50
36
|
- lib/grok/c-ext/pile.rb
|
51
37
|
- lib/grok/c-ext/match.rb
|
38
|
+
- lib/grok/pure/discovery.rb
|
52
39
|
- lib/grok/pure/pile.rb
|
53
40
|
- lib/grok/pure/match.rb
|
54
|
-
- lib/Grok.rb
|
55
41
|
homepage: http://code.google.com/p/semicomplete/wiki/Grok
|
56
42
|
licenses: []
|
57
|
-
|
58
43
|
post_install_message:
|
59
44
|
rdoc_options: []
|
60
|
-
|
61
|
-
require_paths:
|
45
|
+
require_paths:
|
62
46
|
- lib
|
63
47
|
- lib
|
64
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
49
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
70
|
-
|
71
|
-
- 0
|
72
|
-
version: "0"
|
73
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
55
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
|
79
|
-
segments:
|
80
|
-
- 0
|
81
|
-
version: "0"
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
82
60
|
requirements: []
|
83
|
-
|
84
61
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.8.
|
62
|
+
rubygems_version: 1.8.10
|
86
63
|
signing_key:
|
87
64
|
specification_version: 3
|
88
65
|
summary: grok bindings for ruby
|
89
66
|
test_files: []
|
90
|
-
|
data/lib/Grok.rb
DELETED