logstash-filter-grok 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZDZkNTcxMWY2ZWE0ZGMyYTczNGYzYzRjNDdmMDI4NzE5N2IwN2Q5Mg==
5
- data.tar.gz: !binary |-
6
- YTJlZDBhZDg2ODViMzNkZjNhMjZmZDc2OTQ2MTFlYTM1MTgyOGNiNA==
2
+ SHA1:
3
+ metadata.gz: 27006dbd92d0134cef4e01124e2ed8300134aeac
4
+ data.tar.gz: d571e84744111378dcae55c24293e6fdcd4590d0
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZDJlZTRlYzBmYmFjYmRhNzA1OGE5ZTljN2ZkZGNkNzQ0ZTlhNDI0MzNmMTcx
10
- NjFlNjU5MDgwYjI4ZTY3MTkzMWM5ODFmMjAyZGFlMWYzZTI3YjhjMWU0OGNh
11
- NTFjYzg2NjRiODNmOWM1NTY0ZGJhMzRlZTdkY2QzN2ZlYjU0OTM=
12
- data.tar.gz: !binary |-
13
- ZDk4MGU3MzIzNGJkYTk3OGRhYjhiNjIyNTYzYzg1NGU2YzU3ZDQzNGNiZWMw
14
- NThhNTBhMDczNmQ0OTM1NTIyYTRmZjkzZTFmNTcxYzliMWVmM2JiNTc2MTVl
15
- YzhiMDNjM2RlNTI2MjU0OTdmZmE5NzljYmM0NTRhMjg1YmFiYjY=
6
+ metadata.gz: 6c8f3172eddad9922c166ee17849475408e4f298f394d0a4672353b56ead744b5c80f4f8080f1c26bb2ed9242b9f7ef4309c66ca66f402d4c22c27f933daf655
7
+ data.tar.gz: d2cbe6637bb17b6707578c506cdc51ac64aa7720914fb01289b06cc95f456a8d7ed9cb204c88f03b544155a153b8063b85430cc18e0dadf7c70e30778acf250e
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source 'http://rubygems.org'
2
- gem 'rake'
3
- gem 'gem_publisher'
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+ gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5"
data/Rakefile CHANGED
@@ -1,6 +1,2 @@
1
- @files=[]
2
-
3
- task :default do
4
- system("rake -T")
5
- end
6
-
1
+ require "logstash/devutils/rake"
2
+ require "logstash/devutils/rake"
@@ -1,363 +1,359 @@
1
- # encoding: utf-8
2
- require "logstash/filters/base"
3
- require "logstash/namespace"
4
- require "logstash/environment"
5
- require "logstash/patterns/core"
6
- require "set"
7
-
8
- # Parse arbitrary text and structure it.
9
- #
10
- # Grok is currently the best way in logstash to parse crappy unstructured log
11
- # data into something structured and queryable.
12
- #
13
- # This tool is perfect for syslog logs, apache and other webserver logs, mysql
14
- # logs, and in general, any log format that is generally written for humans
15
- # and not computer consumption.
16
- #
17
- # Logstash ships with about 120 patterns by default. You can find them here:
18
- # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
19
- # your own trivially. (See the patterns_dir setting)
20
- #
21
- # If you need help building patterns to match your logs, you will find the
22
- # <http://grokdebug.herokuapp.com> too quite useful!
23
- #
24
- # #### Grok Basics
25
- #
26
- # Grok works by combining text patterns into something that matches your
27
- # logs.
28
- #
29
- # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
30
- #
31
- # The `SYNTAX` is the name of the pattern that will match your text. For
32
- # example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
33
- # be matched by the IP pattern. The syntax is how you match.
34
- #
35
- # The `SEMANTIC` is the identifier you give to the piece of text being matched.
36
- # For example, "3.44" could be the duration of an event, so you could call it
37
- # simply 'duration'. Further, a string "55.3.244.1" might identify the 'client'
38
- # making a request.
39
- #
40
- # For the above example, your grok filter would look something like this:
41
- #
42
- # %{NUMBER:duration} %{IP:client}
43
- #
44
- # Optionally you can add a data type conversion to your grok pattern. By default
45
- # all semantics are saved as strings. If you wish to convert a semantic's data type,
46
- # for example change a string to an integer then suffix it with the target data type.
47
- # For example `%{NUMBER:num:int}` which converts the 'num' semantic from a string to an
48
- # integer. Currently the only supported conversions are `int` and `float`.
49
- #
50
- # #### Example
51
- #
52
- # With that idea of a syntax and semantic, we can pull out useful fields from a
53
- # sample log like this fictional http request log:
54
- #
55
- # 55.3.244.1 GET /index.html 15824 0.043
56
- #
57
- # The pattern for this could be:
58
- #
59
- # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
60
- #
61
- # A more realistic example, let's read these logs from a file:
62
- #
63
- # input {
64
- # file {
65
- # path => "/var/log/http.log"
66
- # }
67
- # }
68
- # filter {
69
- # grok {
70
- # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
71
- # }
72
- # }
73
- #
74
- # After the grok filter, the event will have a few extra fields in it:
75
- #
76
- # * client: 55.3.244.1
77
- # * method: GET
78
- # * request: /index.html
79
- # * bytes: 15824
80
- # * duration: 0.043
81
- #
82
- # #### Regular Expressions
83
- #
84
- # Grok sits on top of regular expressions, so any regular expressions are valid
85
- # in grok as well. The regular expression library is Oniguruma, and you can see
86
- # the full supported regexp syntax [on the Onigiruma
87
- # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
88
- #
89
- # #### Custom Patterns
90
- #
91
- # Sometimes logstash doesn't have a pattern you need. For this, you have
92
- # a few options.
93
- #
94
- # First, you can use the Oniguruma syntax for 'named capture' which will
95
- # let you match a piece of text and save it as a field:
96
- #
97
- # (?<field_name>the pattern here)
98
- #
99
- # For example, postfix logs have a 'queue id' that is an 10 or 11-character
100
- # hexadecimal value. I can capture that easily like this:
101
- #
102
- # (?<queue_id>[0-9A-F]{10,11})
103
- #
104
- # Alternately, you can create a custom patterns file.
105
- #
106
- # * Create a directory called `patterns` with a file in it called `extra`
107
- # (the file name doesn't matter, but name it meaningfully for yourself)
108
- # * In that file, write the pattern you need as the pattern name, a space, then
109
- # the regexp for that pattern.
110
- #
111
- # For example, doing the postfix queue id example as above:
112
- #
113
- # # contents of ./patterns/postfix:
114
- # POSTFIX_QUEUEID [0-9A-F]{10,11}
115
- #
116
- # Then use the `patterns_dir` setting in this plugin to tell logstash where
117
- # your custom patterns directory is. Here's a full example with a sample log:
118
- #
119
- # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
120
- #
121
- # filter {
122
- # grok {
123
- # patterns_dir => "./patterns"
124
- # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
125
- # }
126
- # }
127
- #
128
- # The above will match and result in the following fields:
129
- #
130
- # * timestamp: Jan 1 06:25:43
131
- # * logsource: mailserver14
132
- # * program: postfix/cleanup
133
- # * pid: 21403
134
- # * queue_id: BEF25A72965
135
- # * syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
136
- #
137
- # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
138
- # SYSLOGBASE pattern which itself is defined by other patterns.
139
- class LogStash::Filters::Grok < LogStash::Filters::Base
140
- config_name "grok"
141
- milestone 3
142
-
143
- # Specify a pattern to parse with. This will match the 'message' field.
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+ require "logstash/environment"
5
+ require "logstash/patterns/core"
6
+ require "set"
7
+
8
+ # Parse arbitrary text and structure it.
144
9
  #
145
- # If you want to match other fields than message, use the 'match' setting.
146
- # Multiple patterns is fine.
147
- config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
148
-
149
- # A hash of matches of field => value
10
+ # Grok is currently the best way in logstash to parse crappy unstructured log
11
+ # data into something structured and queryable.
150
12
  #
151
- # For example:
13
+ # This tool is perfect for syslog logs, apache and other webserver logs, mysql
14
+ # logs, and in general, any log format that is generally written for humans
15
+ # and not computer consumption.
152
16
  #
153
- # filter {
154
- # grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
155
- # }
17
+ # Logstash ships with about 120 patterns by default. You can find them here:
18
+ # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
19
+ # your own trivially. (See the `patterns_dir` setting)
20
+ #
21
+ # If you need help building patterns to match your logs, you will find the
22
+ # <http://grokdebug.herokuapp.com> too quite useful!
23
+ #
24
+ # ==== Grok Basics
25
+ #
26
+ # Grok works by combining text patterns into something that matches your
27
+ # logs.
28
+ #
29
+ # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
30
+ #
31
+ # The `SYNTAX` is the name of the pattern that will match your text. For
32
+ # example, `3.44` will be matched by the `NUMBER` pattern and `55.3.244.1` will
33
+ # be matched by the `IP` pattern. The syntax is how you match.
156
34
  #
157
- # Alternatively, using the old array syntax:
35
+ # The `SEMANTIC` is the identifier you give to the piece of text being matched.
36
+ # For example, `3.44` could be the duration of an event, so you could call it
37
+ # simply `duration`. Further, a string `55.3.244.1` might identify the `client`
38
+ # making a request.
158
39
  #
40
+ # For the above example, your grok filter would look something like this:
41
+ # [source,ruby]
42
+ # %{NUMBER:duration} %{IP:client}
43
+ #
44
+ # Optionally you can add a data type conversion to your grok pattern. By default
45
+ # all semantics are saved as strings. If you wish to convert a semantic's data type,
46
+ # for example change a string to an integer then suffix it with the target data type.
47
+ # For example `%{NUMBER:num:int}` which converts the `num` semantic from a string to an
48
+ # integer. Currently the only supported conversions are `int` and `float`.
49
+ #
50
+ # .Examples:
51
+ #
52
+ # With that idea of a syntax and semantic, we can pull out useful fields from a
53
+ # sample log like this fictional http request log:
54
+ # [source,ruby]
55
+ # 55.3.244.1 GET /index.html 15824 0.043
56
+ #
57
+ # The pattern for this could be:
58
+ # [source,ruby]
59
+ # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
60
+ #
61
+ # A more realistic example, let's read these logs from a file:
62
+ # [source,ruby]
63
+ # input {
64
+ # file {
65
+ # path => "/var/log/http.log"
66
+ # }
67
+ # }
159
68
  # filter {
160
- # grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
69
+ # grok {
70
+ # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
71
+ # }
161
72
  # }
162
73
  #
163
- config :match, :validate => :hash, :default => {}
164
-
74
+ # After the grok filter, the event will have a few extra fields in it:
75
+ #
76
+ # * `client: 55.3.244.1`
77
+ # * `method: GET`
78
+ # * `request: /index.html`
79
+ # * `bytes: 15824`
80
+ # * `duration: 0.043`
165
81
  #
166
- # logstash ships by default with a bunch of patterns, so you don't
167
- # necessarily need to define this yourself unless you are adding additional
168
- # patterns.
82
+ # ==== Regular Expressions
169
83
  #
170
- # Pattern files are plain text with format:
84
+ # Grok sits on top of regular expressions, so any regular expressions are valid
85
+ # in grok as well. The regular expression library is Oniguruma, and you can see
86
+ # the full supported regexp syntax [on the Onigiruma
87
+ # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
171
88
  #
172
- # NAME PATTERN
89
+ # ==== Custom Patterns
173
90
  #
174
- # For example:
91
+ # Sometimes logstash doesn't have a pattern you need. For this, you have
92
+ # a few options.
175
93
  #
176
- # NUMBER \d+
177
- config :patterns_dir, :validate => :array, :default => []
178
-
179
- # Drop if matched. Note, this feature may not stay. It is preferable to combine
180
- # grok + grep filters to do parsing + dropping.
181
- config :drop_if_match, :validate => :boolean, :default => false
182
-
183
- # Break on first match. The first successful match by grok will result in the
184
- # filter being finished. If you want grok to try all patterns (maybe you are
185
- # parsing different things), then set this to false.
186
- config :break_on_match, :validate => :boolean, :default => true
187
-
188
- # If true, only store named captures from grok.
189
- config :named_captures_only, :validate => :boolean, :default => true
190
-
191
- # If true, keep empty captures as event fields.
192
- config :keep_empty_captures, :validate => :boolean, :default => false
193
-
194
- # If true, make single-value fields simply that value, not an array
195
- # containing that one value.
196
- config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
197
-
198
- # Append values to the 'tags' field when there has been no
199
- # successful match
200
- config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
201
-
202
- # The fields to overwrite.
94
+ # First, you can use the Oniguruma syntax for named capture which will
95
+ # let you match a piece of text and save it as a field:
96
+ # [source,ruby]
97
+ # (?<field_name>the pattern here)
203
98
  #
204
- # This allows you to overwrite a value in a field that already exists.
99
+ # For example, postfix logs have a `queue id` that is an 10 or 11-character
100
+ # hexadecimal value. I can capture that easily like this:
101
+ # [source,ruby]
102
+ # (?<queue_id>[0-9A-F]{10,11})
205
103
  #
206
- # For example, if you have a syslog line in the 'message' field, you can
207
- # overwrite the 'message' field with part of the match like so:
104
+ # Alternately, you can create a custom patterns file.
208
105
  #
106
+ # * Create a directory called `patterns` with a file in it called `extra`
107
+ # (the file name doesn't matter, but name it meaningfully for yourself)
108
+ # * In that file, write the pattern you need as the pattern name, a space, then
109
+ # the regexp for that pattern.
110
+ #
111
+ # For example, doing the postfix queue id example as above:
112
+ # [source,ruby]
113
+ # # contents of ./patterns/postfix:
114
+ # POSTFIX_QUEUEID [0-9A-F]{10,11}
115
+ #
116
+ # Then use the `patterns_dir` setting in this plugin to tell logstash where
117
+ # your custom patterns directory is. Here's a full example with a sample log:
118
+ # [source,ruby]
119
+ # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
120
+ # [source,ruby]
209
121
  # filter {
210
122
  # grok {
211
- # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
212
- # overwrite => [ "message" ]
123
+ # patterns_dir => "./patterns"
124
+ # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
213
125
  # }
214
126
  # }
215
127
  #
216
- # In this case, a line like "May 29 16:37:11 sadness logger: hello world"
217
- # will be parsed and 'hello world' will overwrite the original message.
218
- config :overwrite, :validate => :array, :default => []
219
-
220
- # Detect if we are running from a jarfile, pick the right path.
221
- @@patterns_path ||= Set.new
222
- #@@patterns_path += [LogStash::Environment.pattern_path("*")]
223
- @@patterns_path += [LogStash::Patterns::Core.path]
224
-
225
- public
226
- def initialize(params)
227
- super(params)
228
- @match["message"] ||= []
229
- @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
230
- # a cache of capture name handler methods.
231
- @handlers = {}
232
- end
233
-
234
- public
235
- def register
236
- require "grok-pure" # rubygem 'jls-grok'
237
-
238
- @patternfiles = []
239
-
240
- # Have @@patterns_path show first. Last-in pattern definitions win; this
241
- # will let folks redefine built-in patterns at runtime.
242
- @patterns_dir = @@patterns_path.to_a + @patterns_dir
243
- @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
244
- @patterns_dir.each do |path|
245
- if File.directory?(path)
246
- path = File.join(path, "*")
247
- end
248
-
249
- Dir.glob(path).each do |file|
250
- @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
251
- @patternfiles << file
252
- end
128
+ # The above will match and result in the following fields:
129
+ #
130
+ # * `timestamp: Jan 1 06:25:43`
131
+ # * `logsource: mailserver14`
132
+ # * `program: postfix/cleanup`
133
+ # * `pid: 21403`
134
+ # * `queue_id: BEF25A72965`
135
+ # * `syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>`
136
+ #
137
+ # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
138
+ # `SYSLOGBASE` pattern which itself is defined by other patterns.
139
+ class LogStash::Filters::Grok < LogStash::Filters::Base
140
+ config_name "grok"
141
+ milestone 3
142
+
143
+ # Specify a pattern to parse with. This will match the `message` field.
144
+ #
145
+ # If you want to match other fields than message, use the `match` setting.
146
+ # Multiple patterns is fine.
147
+ config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
148
+
149
+ # A hash of matches of field => value
150
+ #
151
+ # For example:
152
+ # [source,ruby]
153
+ # filter {
154
+ # grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
155
+ # }
156
+ #
157
+ # Alternatively, using the old array syntax:
158
+ # [source,ruby]
159
+ # filter {
160
+ # grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
161
+ # }
162
+ #
163
+ config :match, :validate => :hash, :default => {}
164
+
165
+ #
166
+ # logstash ships by default with a bunch of patterns, so you don't
167
+ # necessarily need to define this yourself unless you are adding additional
168
+ # patterns.
169
+ #
170
+ # Pattern files are plain text with format:
171
+ # [source,ruby]
172
+ # NAME PATTERN
173
+ #
174
+ # For example:
175
+ # [source,ruby]
176
+ # NUMBER \d+
177
+ config :patterns_dir, :validate => :array, :default => []
178
+
179
+ # Break on first match. The first successful match by grok will result in the
180
+ # filter being finished. If you want grok to try all patterns (maybe you are
181
+ # parsing different things), then set this to false.
182
+ config :break_on_match, :validate => :boolean, :default => true
183
+
184
+ # If `true`, only store named captures from grok.
185
+ config :named_captures_only, :validate => :boolean, :default => true
186
+
187
+ # If `true`, keep empty captures as event fields.
188
+ config :keep_empty_captures, :validate => :boolean, :default => false
189
+
190
+ # If `true`, make single-value fields simply that value, not an array
191
+ # containing that one value.
192
+ config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
193
+
194
+ # Append values to the `tags` field when there has been no
195
+ # successful match
196
+ config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
197
+
198
+ # The fields to overwrite.
199
+ #
200
+ # This allows you to overwrite a value in a field that already exists.
201
+ #
202
+ # For example, if you have a syslog line in the `message` field, you can
203
+ # overwrite the `message` field with part of the match like so:
204
+ # [source,ruby]
205
+ # filter {
206
+ # grok {
207
+ # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
208
+ # overwrite => [ "message" ]
209
+ # }
210
+ # }
211
+ #
212
+ # In this case, a line like `May 29 16:37:11 sadness logger: hello world`
213
+ # will be parsed and `hello world` will overwrite the original message.
214
+ config :overwrite, :validate => :array, :default => []
215
+
216
+ # Detect if we are running from a jarfile, pick the right path.
217
+ @@patterns_path ||= Set.new
218
+ #@@patterns_path += [LogStash::Environment.pattern_path("*")]
219
+ @@patterns_path += [LogStash::Patterns::Core.path]
220
+
221
+ public
222
+ def initialize(params)
223
+ super(params)
224
+ @match["message"] ||= []
225
+ @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
226
+ # a cache of capture name handler methods.
227
+ @handlers = {}
253
228
  end
254
-
255
- @patterns = Hash.new { |h,k| h[k] = [] }
256
-
257
- @logger.info? and @logger.info("Match data", :match => @match)
258
-
259
- @match.each do |field, patterns|
260
- patterns = [patterns] if patterns.is_a?(String)
261
-
262
- @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
263
- patterns.each do |pattern|
264
- @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
265
- grok = Grok.new
266
- grok.logger = @logger unless @logger.nil?
267
- add_patterns_from_files(@patternfiles, grok)
268
- grok.compile(pattern, @named_captures_only)
269
- @patterns[field] << grok
229
+
230
+ public
231
+ def register
232
+ require "grok-pure" # rubygem 'jls-grok'
233
+
234
+ @patternfiles = []
235
+
236
+ # Have @@patterns_path show first. Last-in pattern definitions win; this
237
+ # will let folks redefine built-in patterns at runtime.
238
+ @patterns_dir = @@patterns_path.to_a + @patterns_dir
239
+ @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
240
+ @patterns_dir.each do |path|
241
+ if File.directory?(path)
242
+ path = File.join(path, "*")
243
+ end
244
+
245
+ Dir.glob(path).each do |file|
246
+ @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
247
+ @patternfiles << file
248
+ end
270
249
  end
271
- end # @match.each
272
- end # def register
273
-
274
- public
275
- def filter(event)
276
- return unless filter?(event)
277
-
278
- matched = false
279
- done = false
280
-
281
- @logger.debug? and @logger.debug("Running grok filter", :event => event);
282
- @patterns.each do |field, groks|
283
- if match(groks, field, event)
284
- matched = true
285
- break if @break_on_match
250
+
251
+ @patterns = Hash.new { |h,k| h[k] = [] }
252
+
253
+ @logger.info? and @logger.info("Match data", :match => @match)
254
+
255
+ @match.each do |field, patterns|
256
+ patterns = [patterns] if patterns.is_a?(String)
257
+
258
+ @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
259
+ patterns.each do |pattern|
260
+ @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
261
+ grok = Grok.new
262
+ grok.logger = @logger unless @logger.nil?
263
+ add_patterns_from_files(@patternfiles, grok)
264
+ grok.compile(pattern, @named_captures_only)
265
+ @patterns[field] << grok
266
+ end
267
+ end # @match.each
268
+ end # def register
269
+
270
+ public
271
+ def filter(event)
272
+ return unless filter?(event)
273
+
274
+ matched = false
275
+ done = false
276
+
277
+ @logger.debug? and @logger.debug("Running grok filter", :event => event);
278
+ @patterns.each do |field, groks|
279
+ if match(groks, field, event)
280
+ matched = true
281
+ break if @break_on_match
282
+ end
283
+ #break if done
284
+ end # @patterns.each
285
+
286
+ if matched
287
+ filter_matched(event)
288
+ else
289
+ # Tag this event if we can't parse it. We can use this later to
290
+ # reparse+reindex logs if we improve the patterns given.
291
+ @tag_on_failure.each do |tag|
292
+ event["tags"] ||= []
293
+ event["tags"] << tag unless event["tags"].include?(tag)
294
+ end
286
295
  end
287
- #break if done
288
- end # @patterns.each
289
-
290
- if matched
291
- filter_matched(event)
292
- else
293
- # Tag this event if we can't parse it. We can use this later to
294
- # reparse+reindex logs if we improve the patterns given.
295
- @tag_on_failure.each do |tag|
296
- event["tags"] ||= []
297
- event["tags"] << tag unless event["tags"].include?(tag)
296
+
297
+ @logger.debug? and @logger.debug("Event now: ", :event => event)
298
+ end # def filter
299
+
300
+ private
301
+ def match(groks, field, event)
302
+ input = event[field]
303
+ if input.is_a?(Array)
304
+ success = false
305
+ input.each do |input|
306
+ success |= match_against_groks(groks, input, event)
307
+ end
308
+ return success
309
+ else
310
+ return match_against_groks(groks, input, event)
298
311
  end
312
+ rescue StandardError => e
313
+ @logger.warn("Grok regexp threw exception", :exception => e.message)
299
314
  end
300
-
301
- @logger.debug? and @logger.debug("Event now: ", :event => event)
302
- end # def filter
303
-
304
- private
305
- def match(groks, field, event)
306
- input = event[field]
307
- if input.is_a?(Array)
308
- success = false
309
- input.each do |input|
310
- success |= match_against_groks(groks, input, event)
315
+
316
+ private
317
+ def match_against_groks(groks, input, event)
318
+ matched = false
319
+ groks.each do |grok|
320
+ # Convert anything else to string (number, hash, etc)
321
+ matched = grok.match_and_capture(input.to_s) do |field, value|
322
+ matched = true
323
+ handle(field, value, event)
324
+ end
325
+ break if matched and @break_on_match
311
326
  end
312
- return success
313
- else
314
- return match_against_groks(groks, input, event)
327
+ return matched
315
328
  end
316
- rescue StandardError => e
317
- @logger.warn("Grok regexp threw exception", :exception => e.message)
318
- end
319
-
320
- private
321
- def match_against_groks(groks, input, event)
322
- matched = false
323
- groks.each do |grok|
324
- # Convert anything else to string (number, hash, etc)
325
- matched = grok.match_and_capture(input.to_s) do |field, value|
326
- matched = true
327
- handle(field, value, event)
328
- end
329
- break if matched and @break_on_match
330
- end
331
- return matched
332
- end
333
-
334
- private
335
- def handle(field, value, event)
336
- return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
337
-
338
- if @overwrite.include?(field)
339
- event[field] = value
340
- else
341
- v = event[field]
342
- if v.nil?
329
+
330
+ private
331
+ def handle(field, value, event)
332
+ return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
333
+
334
+ if @overwrite.include?(field)
343
335
  event[field] = value
344
- elsif v.is_a?(Array)
345
- event[field] << value
346
- elsif v.is_a?(String)
347
- # Promote to array since we aren't overwriting.
348
- event[field] = [v, value]
336
+ else
337
+ v = event[field]
338
+ if v.nil?
339
+ event[field] = value
340
+ elsif v.is_a?(Array)
341
+ event[field] << value
342
+ elsif v.is_a?(String)
343
+ # Promote to array since we aren't overwriting.
344
+ event[field] = [v, value]
345
+ end
349
346
  end
350
347
  end
351
- end
352
-
353
- private
354
- def add_patterns_from_files(paths, grok)
355
- paths.each do |path|
356
- if !File.exists?(path)
357
- raise "Grok pattern file does not exist: #{path}"
348
+
349
+ private
350
+ def add_patterns_from_files(paths, grok)
351
+ paths.each do |path|
352
+ if !File.exists?(path)
353
+ raise "Grok pattern file does not exist: #{path}"
354
+ end
355
+ grok.add_patterns_from_file(path)
358
356
  end
359
- grok.add_patterns_from_file(path)
360
- end
361
- end # def add_patterns_from_files
362
-
363
- end # class LogStash::Filters::Grok
357
+ end # def add_patterns_from_files
358
+
359
+ end # class LogStash::Filters::Grok
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-grok'
4
- s.version = '0.1.0'
4
+ s.version = '0.1.2'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parse arbitrary text and structure it."
7
- s.description = "Grok is currently the best way in logstash to parse crappy unstructured log data into something structured and queryable."
7
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
8
8
  s.authors = ["Elasticsearch"]
9
- s.email = 'richard.pijnenburg@elasticsearch.com'
10
- s.homepage = "http://logstash.net/"
9
+ s.email = 'info@elasticsearch.com'
10
+ s.homepage = "http://www.elasticsearch.org/guide/en/logstash/current/index.html"
11
11
  s.require_paths = ["lib"]
12
12
 
13
13
  # Files
@@ -17,13 +17,15 @@ Gem::Specification.new do |s|
17
17
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
18
 
19
19
  # Special flag to let us know this is actually a logstash plugin
20
- s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
20
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" }
21
21
 
22
22
  # Gem dependencies
23
23
  s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
24
 
25
25
  s.add_runtime_dependency 'jls-grok', ['0.11.0']
26
26
  s.add_runtime_dependency 'logstash-patterns-core'
27
+ s.add_development_dependency 'logstash-devutils'
27
28
 
29
+ s.add_development_dependency 'logstash-devutils'
28
30
  end
29
31
 
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- require "spec_helper"
2
+ require "logstash/devutils/rspec/spec_helper"
3
3
  require "logstash/filters/grok"
4
4
 
5
5
  describe LogStash::Filters::Grok do
metadata CHANGED
@@ -1,66 +1,93 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-grok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elasticsearch
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-05 00:00:00.000000000 Z
11
+ date: 2014-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logstash
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
- - - ! '>='
16
+ - - '>='
18
17
  - !ruby/object:Gem::Version
19
18
  version: 1.4.0
20
19
  - - <
21
20
  - !ruby/object:Gem::Version
22
21
  version: 2.0.0
23
- type: :runtime
22
+ name: logstash
24
23
  prerelease: false
24
+ type: :runtime
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - ! '>='
27
+ - - '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: 1.4.0
30
30
  - - <
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.0.0
33
33
  - !ruby/object:Gem::Dependency
34
- name: jls-grok
35
34
  requirement: !ruby/object:Gem::Requirement
36
35
  requirements:
37
36
  - - '='
38
37
  - !ruby/object:Gem::Version
39
38
  version: 0.11.0
40
- type: :runtime
39
+ name: jls-grok
41
40
  prerelease: false
41
+ type: :runtime
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - '='
45
45
  - !ruby/object:Gem::Version
46
46
  version: 0.11.0
47
47
  - !ruby/object:Gem::Dependency
48
- name: logstash-patterns-core
49
48
  requirement: !ruby/object:Gem::Requirement
50
49
  requirements:
51
- - - ! '>='
50
+ - - '>='
52
51
  - !ruby/object:Gem::Version
53
52
  version: '0'
53
+ name: logstash-patterns-core
54
+ prerelease: false
54
55
  type: :runtime
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ name: logstash-devutils
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ - !ruby/object:Gem::Dependency
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ name: logstash-devutils
55
82
  prerelease: false
83
+ type: :development
56
84
  version_requirements: !ruby/object:Gem::Requirement
57
85
  requirements:
58
- - - ! '>='
86
+ - - '>='
59
87
  - !ruby/object:Gem::Version
60
88
  version: '0'
61
- description: Grok is currently the best way in logstash to parse crappy unstructured
62
- log data into something structured and queryable.
63
- email: richard.pijnenburg@elasticsearch.com
89
+ description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
90
+ email: info@elasticsearch.com
64
91
  executables: []
65
92
  extensions: []
66
93
  extra_rdoc_files: []
@@ -71,33 +98,31 @@ files:
71
98
  - Rakefile
72
99
  - lib/logstash/filters/grok.rb
73
100
  - logstash-filter-grok.gemspec
74
- - rakelib/publish.rake
75
- - rakelib/vendor.rake
76
101
  - spec/filters/grok_spec.rb
77
- homepage: http://logstash.net/
102
+ homepage: http://www.elasticsearch.org/guide/en/logstash/current/index.html
78
103
  licenses:
79
104
  - Apache License (2.0)
80
105
  metadata:
81
106
  logstash_plugin: 'true'
82
- group: filter
83
- post_install_message:
107
+ logstash_group: filter
108
+ post_install_message:
84
109
  rdoc_options: []
85
110
  require_paths:
86
111
  - lib
87
112
  required_ruby_version: !ruby/object:Gem::Requirement
88
113
  requirements:
89
- - - ! '>='
114
+ - - '>='
90
115
  - !ruby/object:Gem::Version
91
116
  version: '0'
92
117
  required_rubygems_version: !ruby/object:Gem::Requirement
93
118
  requirements:
94
- - - ! '>='
119
+ - - '>='
95
120
  - !ruby/object:Gem::Version
96
121
  version: '0'
97
122
  requirements: []
98
- rubyforge_project:
99
- rubygems_version: 2.4.1
100
- signing_key:
123
+ rubyforge_project:
124
+ rubygems_version: 2.1.9
125
+ signing_key:
101
126
  specification_version: 4
102
127
  summary: Parse arbitrary text and structure it.
103
128
  test_files:
data/rakelib/publish.rake DELETED
@@ -1,9 +0,0 @@
1
- require "gem_publisher"
2
-
3
- desc "Publish gem to RubyGems.org"
4
- task :publish_gem do |t|
5
- gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
- gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
- puts "Published #{gem}" if gem
8
- end
9
-
data/rakelib/vendor.rake DELETED
@@ -1,169 +0,0 @@
1
- require "net/http"
2
- require "uri"
3
- require "digest/sha1"
4
-
5
- def vendor(*args)
6
- return File.join("vendor", *args)
7
- end
8
-
9
- directory "vendor/" => ["vendor"] do |task, args|
10
- mkdir task.name
11
- end
12
-
13
- def fetch(url, sha1, output)
14
-
15
- puts "Downloading #{url}"
16
- actual_sha1 = download(url, output)
17
-
18
- if actual_sha1 != sha1
19
- fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
- end
21
- end # def fetch
22
-
23
- def file_fetch(url, sha1)
24
- filename = File.basename( URI(url).path )
25
- output = "vendor/#{filename}"
26
- task output => [ "vendor/" ] do
27
- begin
28
- actual_sha1 = file_sha1(output)
29
- if actual_sha1 != sha1
30
- fetch(url, sha1, output)
31
- end
32
- rescue Errno::ENOENT
33
- fetch(url, sha1, output)
34
- end
35
- end.invoke
36
-
37
- return output
38
- end
39
-
40
- def file_sha1(path)
41
- digest = Digest::SHA1.new
42
- fd = File.new(path, "r")
43
- while true
44
- begin
45
- digest << fd.sysread(16384)
46
- rescue EOFError
47
- break
48
- end
49
- end
50
- return digest.hexdigest
51
- ensure
52
- fd.close if fd
53
- end
54
-
55
- def download(url, output)
56
- uri = URI(url)
57
- digest = Digest::SHA1.new
58
- tmp = "#{output}.tmp"
59
- Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
- request = Net::HTTP::Get.new(uri.path)
61
- http.request(request) do |response|
62
- fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
- size = (response["content-length"].to_i || -1).to_f
64
- count = 0
65
- File.open(tmp, "w") do |fd|
66
- response.read_body do |chunk|
67
- fd.write(chunk)
68
- digest << chunk
69
- if size > 0 && $stdout.tty?
70
- count += chunk.bytesize
71
- $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
- end
73
- end
74
- end
75
- $stdout.write("\r \r") if $stdout.tty?
76
- end
77
- end
78
-
79
- File.rename(tmp, output)
80
-
81
- return digest.hexdigest
82
- rescue SocketError => e
83
- puts "Failure while downloading #{url}: #{e}"
84
- raise
85
- ensure
86
- File.unlink(tmp) if File.exist?(tmp)
87
- end # def download
88
-
89
- def untar(tarball, &block)
90
- require "archive/tar/minitar"
91
- tgz = Zlib::GzipReader.new(File.open(tarball))
92
- # Pull out typesdb
93
- tar = Archive::Tar::Minitar::Input.open(tgz)
94
- tar.each do |entry|
95
- path = block.call(entry)
96
- next if path.nil?
97
- parent = File.dirname(path)
98
-
99
- mkdir_p parent unless File.directory?(parent)
100
-
101
- # Skip this file if the output file is the same size
102
- if entry.directory?
103
- mkdir path unless File.directory?(path)
104
- else
105
- entry_mode = entry.instance_eval { @mode } & 0777
106
- if File.exists?(path)
107
- stat = File.stat(path)
108
- # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
- # expose headers in the entry.
110
- entry_size = entry.instance_eval { @size }
111
- # If file sizes are same, skip writing.
112
- next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
- end
114
- puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
- File.open(path, "w") do |fd|
116
- # eof? check lets us skip empty files. Necessary because the API provided by
117
- # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
- # IO object. Something about empty files in this EntryStream causes
119
- # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
- # TODO(sissel): File a bug about this.
121
- while !entry.eof?
122
- chunk = entry.read(16384)
123
- fd.write(chunk)
124
- end
125
- #IO.copy_stream(entry, fd)
126
- end
127
- File.chmod(entry_mode, path)
128
- end
129
- end
130
- tar.close
131
- File.unlink(tarball) if File.file?(tarball)
132
- end # def untar
133
-
134
- def ungz(file)
135
-
136
- outpath = file.gsub('.gz', '')
137
- tgz = Zlib::GzipReader.new(File.open(file))
138
- begin
139
- File.open(outpath, "w") do |out|
140
- IO::copy_stream(tgz, out)
141
- end
142
- File.unlink(file)
143
- rescue
144
- File.unlink(outpath) if File.file?(outpath)
145
- raise
146
- end
147
- tgz.close
148
- end
149
-
150
- desc "Process any vendor files required for this plugin"
151
- task "vendor" do |task, args|
152
-
153
- @files.each do |file|
154
- download = file_fetch(file['url'], file['sha1'])
155
- if download =~ /.tar.gz/
156
- prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
- untar(download) do |entry|
158
- if !file['files'].nil?
159
- next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
- out = entry.full_name.split("/").last
161
- end
162
- File.join('vendor', out)
163
- end
164
- elsif download =~ /.gz/
165
- ungz(download)
166
- end
167
- end
168
-
169
- end