logstash-filter-grok 4.0.4 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Gemfile +6 -0
- data/LICENSE +199 -10
- data/README.md +1 -1
- data/docs/index.asciidoc +61 -0
- data/lib/logstash/filters/grok.rb +186 -55
- data/logstash-filter-grok.gemspec +4 -4
- data/spec/filters/grok_performance_spec.rb +144 -0
- data/spec/filters/grok_spec.rb +607 -630
- data/spec/spec_helper.rb +19 -0
- metadata +32 -10
- data/lib/logstash/filters/grok/timeout_enforcer.rb +0 -72
- data/lib/logstash/filters/grok/timeout_exception.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87eff1d28b6fd87d466b877482c6a81f7c831085fa412648e99b06b01f63bdb7
|
4
|
+
data.tar.gz: c97a71386c92e197718de56e99d7d93da9be6c9106b268d87f0fce7679aa320e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3542aeedd78021a4d27060a8cb28f2f4818d0b898f18ad1bc756f9fda7ca80bf07876bbff07d78d7390388c0cab6a6a939fdaea66d81d02f10a7f896c77fc461
|
7
|
+
data.tar.gz: 344c2f04142ae37a4cf1b5fafca6d3c592b80eac15829a5bd342c550b40b3204acaf2e0c3e8ffd2cf0a310a269c7caa8492c3ca4b1178cd0d505cbc9cb6c6c7a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 4.4.0
|
2
|
+
- Feat: ECS compatibility support [#162](https://github.com/logstash-plugins/logstash-filter-grok/pull/162)
|
3
|
+
|
4
|
+
The filter supports using (built-in) patterns definition that are fully Elactic Common Schema compliant.
|
5
|
+
|
6
|
+
## 4.3.0
|
7
|
+
- Added: added target support [#156](https://github.com/logstash-plugins/logstash-filter-grok/pull/156)
|
8
|
+
|
9
|
+
## 4.2.0
|
10
|
+
- Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153)
|
11
|
+
|
12
|
+
## 4.1.1
|
13
|
+
- Fix formatting for code sample [#148](https://github.com/logstash-plugins/logstash-filter-grok/pull/148)
|
14
|
+
|
15
|
+
## 4.1.0
|
16
|
+
- Changed timeout handling using the Timeout class [#147](https://github.com/logstash-plugins/logstash-filter-grok/pull/147)
|
17
|
+
|
1
18
|
## 4.0.4
|
2
19
|
- Added info and link to documentation for logstash-filter-dissect as another option for extracting unstructured event data into fields
|
3
20
|
[#144](https://github.com/logstash-plugins/logstash-filter-grok/issues/144)
|
data/Gemfile
CHANGED
@@ -9,3 +9,9 @@ if Dir.exist?(logstash_path) && use_logstash_source
|
|
9
9
|
gem 'logstash-core', :path => "#{logstash_path}/logstash-core"
|
10
10
|
gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api"
|
11
11
|
end
|
12
|
+
|
13
|
+
group :test do
|
14
|
+
gem 'rspec-benchmark', :require => false if RUBY_VERSION >= '2.3'
|
15
|
+
gem 'logstash-input-generator', :require => false
|
16
|
+
gem 'logstash-output-null', :require => false
|
17
|
+
end
|
data/LICENSE
CHANGED
@@ -1,13 +1,202 @@
|
|
1
|
-
Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
|
2
1
|
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
Apache License
|
3
|
+
Version 2.0, January 2004
|
4
|
+
http://www.apache.org/licenses/
|
6
5
|
|
7
|
-
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
8
7
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
8
|
+
1. Definitions.
|
9
|
+
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
12
|
+
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14
|
+
the copyright owner that is granting the License.
|
15
|
+
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
17
|
+
other entities that control, are controlled by, or are under common
|
18
|
+
control with that entity. For the purposes of this definition,
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
20
|
+
direction or management of such entity, whether by contract or
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23
|
+
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25
|
+
exercising permissions granted by this License.
|
26
|
+
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
28
|
+
including but not limited to software source code, documentation
|
29
|
+
source, and configuration files.
|
30
|
+
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
32
|
+
transformation or translation of a Source form, including but
|
33
|
+
not limited to compiled object code, generated documentation,
|
34
|
+
and conversions to other media types.
|
35
|
+
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
37
|
+
Object form, made available under the License, as indicated by a
|
38
|
+
copyright notice that is included in or attached to the work
|
39
|
+
(an example is provided in the Appendix below).
|
40
|
+
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47
|
+
the Work and Derivative Works thereof.
|
48
|
+
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
50
|
+
the original version of the Work and any modifications or additions
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
62
|
+
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
65
|
+
subsequently incorporated within the Work.
|
66
|
+
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
73
|
+
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79
|
+
where such license applies only to those patent claims licensable
|
80
|
+
by such Contributor that are necessarily infringed by their
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
83
|
+
institute patent litigation against any entity (including a
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
86
|
+
or contributory patent infringement, then any patent licenses
|
87
|
+
granted to You under this License for that Work shall terminate
|
88
|
+
as of the date such litigation is filed.
|
89
|
+
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
92
|
+
modifications, and in Source or Object form, provided that You
|
93
|
+
meet the following conditions:
|
94
|
+
|
95
|
+
(a) You must give any other recipients of the Work or
|
96
|
+
Derivative Works a copy of this License; and
|
97
|
+
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
99
|
+
stating that You changed the files; and
|
100
|
+
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
103
|
+
attribution notices from the Source form of the Work,
|
104
|
+
excluding those notices that do not pertain to any part of
|
105
|
+
the Derivative Works; and
|
106
|
+
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
109
|
+
include a readable copy of the attribution notices contained
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
112
|
+
of the following places: within a NOTICE text file distributed
|
113
|
+
as part of the Derivative Works; within the Source form or
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
115
|
+
within a display generated by the Derivative Works, if and
|
116
|
+
wherever such third-party notices normally appear. The contents
|
117
|
+
of the NOTICE file are for informational purposes only and
|
118
|
+
do not modify the License. You may add Your own attribution
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
121
|
+
that such additional attribution notices cannot be construed
|
122
|
+
as modifying the License.
|
123
|
+
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
125
|
+
may provide additional or different license terms and conditions
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
129
|
+
the conditions stated in this License.
|
130
|
+
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
134
|
+
this License, without any additional terms or conditions.
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136
|
+
the terms of any separate license agreement you may have executed
|
137
|
+
with Licensor regarding such Contributions.
|
138
|
+
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
141
|
+
except as required for reasonable and customary use in describing the
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
143
|
+
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
153
|
+
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
159
|
+
incidental, or consequential damages of any character arising as a
|
160
|
+
result of this License or out of the use or inability to use the
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
163
|
+
other commercial damages or losses), even if such Contributor
|
164
|
+
has been advised of the possibility of such damages.
|
165
|
+
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169
|
+
or other liability obligations and/or rights consistent with this
|
170
|
+
License. However, in accepting such obligations, You may act only
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
175
|
+
of your accepting any such warranty or additional liability.
|
176
|
+
|
177
|
+
END OF TERMS AND CONDITIONS
|
178
|
+
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
180
|
+
|
181
|
+
To apply the Apache License to your work, attach the following
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183
|
+
replaced with your own identifying information. (Don't include
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
185
|
+
comment syntax for the file format. We also recommend that a
|
186
|
+
file or class name and description of purpose be included on the
|
187
|
+
same "printed page" as the copyright notice for easier
|
188
|
+
identification within third-party archives.
|
189
|
+
|
190
|
+
Copyright 2020 Elastic and contributors
|
191
|
+
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193
|
+
you may not use this file except in compliance with the License.
|
194
|
+
You may obtain a copy of the License at
|
195
|
+
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
197
|
+
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201
|
+
See the License for the specific language governing permissions and
|
202
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Logstash Plugin
|
2
2
|
|
3
|
-
[![Travis Build Status](https://travis-ci.
|
3
|
+
[![Travis Build Status](https://travis-ci.com/logstash-plugins/logstash-filter-grok.svg)](https://travis-ci.com/logstash-plugins/logstash-filter-grok)
|
4
4
|
|
5
5
|
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
6
6
|
|
data/docs/index.asciidoc
CHANGED
@@ -143,15 +143,21 @@ For example, doing the postfix queue id example as above:
|
|
143
143
|
|
144
144
|
Then use the `patterns_dir` setting in this plugin to tell logstash where
|
145
145
|
your custom patterns directory is. Here's a full example with a sample log:
|
146
|
+
|
146
147
|
[source,ruby]
|
148
|
+
-----
|
147
149
|
Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
|
150
|
+
-----
|
151
|
+
|
148
152
|
[source,ruby]
|
153
|
+
-----
|
149
154
|
filter {
|
150
155
|
grok {
|
151
156
|
patterns_dir => ["./patterns"]
|
152
157
|
match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
|
153
158
|
}
|
154
159
|
}
|
160
|
+
-----
|
155
161
|
|
156
162
|
The above will match and result in the following fields:
|
157
163
|
|
@@ -169,6 +175,21 @@ Another option is to define patterns _inline_ in the filter using `pattern_defin
|
|
169
175
|
This is mostly for convenience and allows user to define a pattern which can be used just in that
|
170
176
|
filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter.
|
171
177
|
|
178
|
+
[id="plugins-{type}s-{plugin}-ecs"]
|
179
|
+
==== Migrating to Elastic Common Schema (ECS)
|
180
|
+
|
181
|
+
To ease migration to the {ecs-ref}[Elastic Common Schema (ECS)], the filter
|
182
|
+
plugin offers a new set of ECS-compliant patterns in addition to the existing
|
183
|
+
patterns. The new ECS pattern definitions capture event field names that are
|
184
|
+
compliant with the schema.
|
185
|
+
|
186
|
+
The ECS pattern set has all of the pattern definitions from the legacy set, and is
|
187
|
+
a drop-in replacement. Use the <<plugins-{type}s-{plugin}-ecs_compatibility>>
|
188
|
+
setting to switch modes.
|
189
|
+
|
190
|
+
New features and enhancements will be added to the ECS-compliant files. The
|
191
|
+
legacy patterns may still receive bug fixes which are backwards compatible.
|
192
|
+
|
172
193
|
|
173
194
|
[id="plugins-{type}s-{plugin}-options"]
|
174
195
|
==== Grok Filter Configuration Options
|
@@ -179,6 +200,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
179
200
|
|=======================================================================
|
180
201
|
|Setting |Input type|Required
|
181
202
|
| <<plugins-{type}s-{plugin}-break_on_match>> |<<boolean,boolean>>|No
|
203
|
+
| <<plugins-{type}s-{plugin}-ecs_compatibility>> |<<string,string>>|No
|
182
204
|
| <<plugins-{type}s-{plugin}-keep_empty_captures>> |<<boolean,boolean>>|No
|
183
205
|
| <<plugins-{type}s-{plugin}-match>> |<<hash,hash>>|No
|
184
206
|
| <<plugins-{type}s-{plugin}-named_captures_only>> |<<boolean,boolean>>|No
|
@@ -189,6 +211,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
189
211
|
| <<plugins-{type}s-{plugin}-tag_on_failure>> |<<array,array>>|No
|
190
212
|
| <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
|
191
213
|
| <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
|
214
|
+
| <<plugins-{type}s-{plugin}-timeout_scope>> |<<string,string>>|No
|
192
215
|
|=======================================================================
|
193
216
|
|
194
217
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -206,6 +229,20 @@ Break on first match. The first successful match by grok will result in the
|
|
206
229
|
filter being finished. If you want grok to try all patterns (maybe you are
|
207
230
|
parsing different things), then set this to false.
|
208
231
|
|
232
|
+
[id="plugins-{type}s-{plugin}-ecs_compatibility"]
|
233
|
+
===== `ecs_compatibility`
|
234
|
+
|
235
|
+
* Value type is <<string,string>>
|
236
|
+
* Supported values are:
|
237
|
+
** `disabled`: the plugin will load legacy (built-in) pattern definitions
|
238
|
+
** `v1`: all patterns provided by the plugin will use ECS compliant captures
|
239
|
+
* Default value depends on which version of Logstash is running:
|
240
|
+
** When Logstash provides a `pipeline.ecs_compatibility` setting, its value is used as the default
|
241
|
+
** Otherwise, the default value is `disabled`.
|
242
|
+
|
243
|
+
Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)].
|
244
|
+
The value of this setting affects extracted event field names when a composite pattern (such as `HTTPD_COMMONLOG`) is matched.
|
245
|
+
|
209
246
|
[id="plugins-{type}s-{plugin}-keep_empty_captures"]
|
210
247
|
===== `keep_empty_captures`
|
211
248
|
|
@@ -338,6 +375,14 @@ successful match
|
|
338
375
|
|
339
376
|
Tag to apply if a grok regexp times out.
|
340
377
|
|
378
|
+
[id="plugins-{type}s-{plugin}-target"]
|
379
|
+
===== `target`
|
380
|
+
|
381
|
+
* Value type is <<string,string>>
|
382
|
+
* There is no default value for this setting
|
383
|
+
|
384
|
+
Define target namespace for placing matches.
|
385
|
+
|
341
386
|
[id="plugins-{type}s-{plugin}-timeout_millis"]
|
342
387
|
===== `timeout_millis`
|
343
388
|
|
@@ -350,6 +395,22 @@ This will never timeout early, but may take a little longer to timeout.
|
|
350
395
|
Actual timeout is approximate based on a 250ms quantization.
|
351
396
|
Set to 0 to disable timeouts
|
352
397
|
|
398
|
+
[id="plugins-{type}s-{plugin}-timeout_scope"]
|
399
|
+
===== `timeout_scope`
|
400
|
+
|
401
|
+
* Value type is <<string,string>>
|
402
|
+
* Default value is `"pattern"`
|
403
|
+
* Supported values are `"pattern"` and `"event"`
|
404
|
+
|
405
|
+
When multiple patterns are provided to <<plugins-{type}s-{plugin}-match>>,
|
406
|
+
the timeout has historically applied to _each_ pattern, incurring overhead
|
407
|
+
for each and every pattern that is attempted; when the grok filter is
|
408
|
+
configured with `timeout_scope => event`, the plugin instead enforces
|
409
|
+
a single timeout across all attempted matches on the event, so it can
|
410
|
+
achieve similar safeguard against runaway matchers with significantly
|
411
|
+
less overhead.
|
412
|
+
|
413
|
+
It's usually better to scope the timeout for the whole event.
|
353
414
|
|
354
415
|
|
355
416
|
[id="plugins-{type}s-{plugin}-common-options"]
|
@@ -3,8 +3,9 @@
|
|
3
3
|
require "logstash/namespace"
|
4
4
|
require "logstash/environment"
|
5
5
|
require "logstash/patterns/core"
|
6
|
+
require 'logstash/plugin_mixins/ecs_compatibility_support'
|
6
7
|
require "grok-pure" # rubygem 'jls-grok'
|
7
|
-
require "
|
8
|
+
require "timeout"
|
8
9
|
|
9
10
|
# Parse arbitrary text and structure it.
|
10
11
|
#
|
@@ -139,13 +140,13 @@
|
|
139
140
|
# `SYSLOGBASE` pattern which itself is defined by other patterns.
|
140
141
|
#
|
141
142
|
# Another option is to define patterns _inline_ in the filter using `pattern_definitions`.
|
142
|
-
# This is mostly for convenience and allows user to define a pattern which can be used just in that
|
143
|
+
# This is mostly for convenience and allows user to define a pattern which can be used just in that
|
143
144
|
# filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter.
|
144
145
|
#
|
145
146
|
class LogStash::Filters::Grok < LogStash::Filters::Base
|
147
|
+
include LogStash::PluginMixins::ECSCompatibilitySupport
|
148
|
+
|
146
149
|
config_name "grok"
|
147
|
-
require "logstash/filters/grok/timeout_enforcer"
|
148
|
-
require "logstash/filters/grok/timeout_exception"
|
149
150
|
|
150
151
|
# A hash of matches of field => value
|
151
152
|
#
|
@@ -169,7 +170,7 @@
|
|
169
170
|
# necessarily need to define this yourself unless you are adding additional
|
170
171
|
# patterns. You can point to multiple pattern directories using this setting.
|
171
172
|
# Note that Grok will read all files in the directory matching the patterns_files_glob
|
172
|
-
# and assume it's a pattern file (including any tilde backup files).
|
173
|
+
# and assume it's a pattern file (including any tilde backup files).
|
173
174
|
# [source,ruby]
|
174
175
|
# patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"]
|
175
176
|
#
|
@@ -205,6 +206,10 @@
|
|
205
206
|
# If `true`, keep empty captures as event fields.
|
206
207
|
config :keep_empty_captures, :validate => :boolean, :default => false
|
207
208
|
|
209
|
+
# Define the target field for placing the matched captures.
|
210
|
+
# If this setting is omitted, data gets stored at the root (top level) of the event.
|
211
|
+
config :target, :validate => :string
|
212
|
+
|
208
213
|
# Append values to the `tags` field when there has been no
|
209
214
|
# successful match
|
210
215
|
config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
|
@@ -216,6 +221,16 @@
|
|
216
221
|
# Set to 0 to disable timeouts
|
217
222
|
config :timeout_millis, :validate => :number, :default => 30000
|
218
223
|
|
224
|
+
# When multiple patterns are provided to `match`,
|
225
|
+
# the timeout has historically applied to _each_ pattern, incurring overhead
|
226
|
+
# for each and every pattern that is attempted; when the grok filter is
|
227
|
+
# configured with `timeout_scope => 'event'`, the plugin instead enforces
|
228
|
+
# a single timeout across all attempted matches on the event, so it can
|
229
|
+
# achieve similar safeguard against runaway matchers with significantly
|
230
|
+
# less overhead.
|
231
|
+
# It's usually better to scope the timeout for the whole event.
|
232
|
+
config :timeout_scope, :validate => %w(pattern event), :default => "pattern"
|
233
|
+
|
219
234
|
# Tag to apply if a grok regexp times out.
|
220
235
|
config :tag_on_timeout, :validate => :string, :default => '_groktimeout'
|
221
236
|
|
@@ -237,28 +252,14 @@
|
|
237
252
|
# will be parsed and `hello world` will overwrite the original message.
|
238
253
|
config :overwrite, :validate => :array, :default => []
|
239
254
|
|
240
|
-
attr_reader :timeout_enforcer
|
241
|
-
|
242
|
-
# Register default pattern paths
|
243
|
-
@@patterns_path ||= Set.new
|
244
|
-
@@patterns_path += [
|
245
|
-
LogStash::Patterns::Core.path,
|
246
|
-
LogStash::Environment.pattern_path("*")
|
247
|
-
]
|
248
|
-
|
249
|
-
public
|
250
255
|
def register
|
251
256
|
# a cache of capture name handler methods.
|
252
257
|
@handlers = {}
|
253
258
|
|
254
|
-
@timeout_enforcer = TimeoutEnforcer.new(@logger, @timeout_millis * 1000000)
|
255
|
-
@timeout_enforcer.start! unless @timeout_millis == 0
|
256
|
-
|
257
259
|
@patternfiles = []
|
258
|
-
|
259
|
-
#
|
260
|
-
|
261
|
-
@patternfiles += patterns_files_from_paths(@@patterns_path.to_a, "*")
|
260
|
+
# Have (default) patterns_path show first. Last-in pattern definitions wins
|
261
|
+
# this will let folks redefine built-in patterns at runtime
|
262
|
+
@patternfiles += patterns_files_from_paths(patterns_path, "*")
|
262
263
|
@patternfiles += patterns_files_from_paths(@patterns_dir, @patterns_files_glob)
|
263
264
|
|
264
265
|
@patterns = Hash.new { |h,k| h[k] = [] }
|
@@ -271,11 +272,11 @@
|
|
271
272
|
patterns = [patterns] if patterns.is_a?(String)
|
272
273
|
@metric_match_fields.gauge(field, patterns.length)
|
273
274
|
|
274
|
-
@logger.trace("Grok compile", :field => field, :patterns => patterns)
|
275
|
+
@logger.trace? && @logger.trace("Grok compile", :field => field, :patterns => patterns)
|
275
276
|
patterns.each do |pattern|
|
276
|
-
@logger.debug?
|
277
|
+
@logger.debug? && @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
|
277
278
|
grok = Grok.new
|
278
|
-
grok.logger = @logger
|
279
|
+
grok.logger = @logger
|
279
280
|
add_patterns_from_files(@patternfiles, grok)
|
280
281
|
add_patterns_from_inline_definition(@pattern_definitions, grok)
|
281
282
|
grok.compile(pattern, @named_captures_only)
|
@@ -284,21 +285,24 @@
|
|
284
285
|
end # @match.each
|
285
286
|
@match_counter = metric.counter(:matches)
|
286
287
|
@failure_counter = metric.counter(:failures)
|
288
|
+
|
289
|
+
@target = "[#{@target.strip}]" if @target && @target !~ /\[.*?\]/
|
290
|
+
|
291
|
+
@timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout::INSTANCE
|
292
|
+
@matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self)
|
287
293
|
end # def register
|
288
294
|
|
289
|
-
public
|
290
295
|
def filter(event)
|
291
296
|
matched = false
|
292
297
|
|
293
|
-
@logger.debug?
|
298
|
+
@logger.debug? && @logger.debug("Running grok filter", :event => event.to_hash)
|
294
299
|
|
295
300
|
@patterns.each do |field, groks|
|
296
301
|
if match(groks, field, event)
|
297
302
|
matched = true
|
298
303
|
break if @break_on_match
|
299
304
|
end
|
300
|
-
|
301
|
-
end # @patterns.each
|
305
|
+
end
|
302
306
|
|
303
307
|
if matched
|
304
308
|
@match_counter.increment(1)
|
@@ -308,14 +312,36 @@
|
|
308
312
|
@tag_on_failure.each {|tag| event.tag(tag)}
|
309
313
|
end
|
310
314
|
|
311
|
-
@logger.debug?
|
312
|
-
rescue
|
315
|
+
@logger.debug? && @logger.debug("Event now: ", :event => event.to_hash)
|
316
|
+
rescue GrokTimeoutException => e
|
313
317
|
@logger.warn(e.message)
|
314
318
|
metric.increment(:timeouts)
|
315
319
|
event.tag(@tag_on_timeout)
|
316
320
|
end # def filter
|
317
321
|
|
322
|
+
def close
|
323
|
+
end
|
324
|
+
|
318
325
|
private
|
326
|
+
|
327
|
+
# The default pattern paths, depending on environment.
|
328
|
+
def patterns_path
|
329
|
+
patterns_path = []
|
330
|
+
case ecs_compatibility
|
331
|
+
when :disabled
|
332
|
+
patterns_path << LogStash::Patterns::Core.path # :legacy
|
333
|
+
when :v1
|
334
|
+
patterns_path << LogStash::Patterns::Core.path('ecs-v1')
|
335
|
+
else
|
336
|
+
fail(NotImplementedError, "ECS #{ecs_compatibility} is not supported by this plugin.")
|
337
|
+
end
|
338
|
+
# allow plugin to be instantiated outside the LS environment (in tests)
|
339
|
+
if defined? LogStash::Environment.pattern_path
|
340
|
+
patterns_path << LogStash::Environment.pattern_path("*")
|
341
|
+
end
|
342
|
+
patterns_path
|
343
|
+
end
|
344
|
+
|
319
345
|
def match(groks, field, event)
|
320
346
|
input = event.get(field)
|
321
347
|
if input.is_a?(Array)
|
@@ -328,52 +354,92 @@
|
|
328
354
|
match_against_groks(groks, field, input, event)
|
329
355
|
end
|
330
356
|
rescue StandardError => e
|
331
|
-
@logger.warn("Grok regexp threw exception", :
|
357
|
+
@logger.warn("Grok regexp threw exception", :message => e.message, :exception => e.class, :backtrace => e.backtrace)
|
332
358
|
return false
|
333
359
|
end
|
334
|
-
|
335
|
-
private
|
360
|
+
|
336
361
|
def match_against_groks(groks, field, input, event)
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
362
|
+
# Convert anything else to string (number, hash, etc)
|
363
|
+
context = GrokContext.new(field, input.to_s)
|
364
|
+
@matcher.match(context, groks, event, @break_on_match)
|
365
|
+
end
|
341
366
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
367
|
+
# Internal (base) helper to handle the global timeout switch.
|
368
|
+
# @private
|
369
|
+
class Matcher
|
370
|
+
|
371
|
+
def initialize(filter)
|
372
|
+
@filter = filter
|
373
|
+
end
|
374
|
+
|
375
|
+
def match(context, groks, event, break_on_match)
|
376
|
+
matched = false
|
377
|
+
|
378
|
+
groks.each do |grok|
|
379
|
+
context.set_grok(grok)
|
380
|
+
|
381
|
+
matched = execute(context, grok)
|
382
|
+
if matched
|
383
|
+
grok.capture(matched) { |field, value| @filter.handle(field, value, event) }
|
384
|
+
break if break_on_match
|
385
|
+
end
|
346
386
|
end
|
387
|
+
|
388
|
+
matched
|
389
|
+
end
|
390
|
+
|
391
|
+
protected
|
392
|
+
|
393
|
+
def execute(context, grok)
|
394
|
+
grok.execute(context.input)
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
|
399
|
+
# @private
|
400
|
+
class EventTimeoutMatcher < Matcher
|
401
|
+
# @override
|
402
|
+
def match(context, groks, event, break_on_match)
|
403
|
+
@filter.with_timeout(context) { super }
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
# @private
|
408
|
+
class PatternTimeoutMatcher < Matcher
|
409
|
+
# @override
|
410
|
+
def execute(context, grok)
|
411
|
+
@filter.with_timeout(context) { super }
|
347
412
|
end
|
348
|
-
|
349
|
-
matched
|
350
413
|
end
|
351
414
|
|
352
|
-
private
|
353
415
|
def handle(field, value, event)
|
354
416
|
return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
|
355
417
|
|
418
|
+
target_field = @target ? "#{@target}[#{field}]" : field
|
419
|
+
|
356
420
|
if @overwrite.include?(field)
|
357
|
-
event.set(
|
421
|
+
event.set(target_field, value)
|
358
422
|
else
|
359
|
-
v = event.get(
|
423
|
+
v = event.get(target_field)
|
360
424
|
if v.nil?
|
361
|
-
event.set(
|
425
|
+
event.set(target_field, value)
|
362
426
|
elsif v.is_a?(Array)
|
363
427
|
# do not replace the code below with:
|
364
428
|
# event[field] << value
|
365
429
|
# this assumes implementation specific feature of returning a mutable object
|
366
430
|
# from a field ref which should not be assumed and will change in the future.
|
367
431
|
v << value
|
368
|
-
event.set(
|
432
|
+
event.set(target_field, v)
|
369
433
|
elsif v.is_a?(String)
|
370
434
|
# Promote to array since we aren't overwriting.
|
371
|
-
event.set(
|
435
|
+
event.set(target_field, [v, value])
|
436
|
+
else
|
437
|
+
@logger.debug("Not adding matched value - found existing (#{v.class})", :field => target_field, :value => value)
|
372
438
|
end
|
373
439
|
end
|
374
440
|
end
|
441
|
+
public :handle
|
375
442
|
|
376
|
-
private
|
377
443
|
def patterns_files_from_paths(paths, glob)
|
378
444
|
patternfiles = []
|
379
445
|
@logger.debug("Grok patterns path", :paths => paths)
|
@@ -394,7 +460,6 @@
|
|
394
460
|
patternfiles
|
395
461
|
end # def patterns_files_from_paths
|
396
462
|
|
397
|
-
private
|
398
463
|
def add_patterns_from_files(paths, grok)
|
399
464
|
paths.each do |path|
|
400
465
|
if !File.exists?(path)
|
@@ -404,7 +469,6 @@
|
|
404
469
|
end
|
405
470
|
end # def add_patterns_from_files
|
406
471
|
|
407
|
-
private
|
408
472
|
def add_patterns_from_inline_definition(pattern_definitions, grok)
|
409
473
|
pattern_definitions.each do |name, pattern|
|
410
474
|
next if pattern.nil?
|
@@ -412,8 +476,75 @@
|
|
412
476
|
end
|
413
477
|
end
|
414
478
|
|
415
|
-
|
416
|
-
|
479
|
+
class TimeoutError < RuntimeError; end
|
480
|
+
|
481
|
+
class GrokTimeoutException < Exception
|
482
|
+
attr_reader :grok, :field, :value
|
483
|
+
|
484
|
+
def initialize(grok, field, value)
|
485
|
+
@grok = grok
|
486
|
+
@field = field
|
487
|
+
@value = value
|
488
|
+
end
|
489
|
+
|
490
|
+
def message
|
491
|
+
"Timeout executing grok '#{@grok.pattern}' against field '#{field}' with value '#{trunc_value}'!"
|
492
|
+
end
|
493
|
+
|
494
|
+
def trunc_value
|
495
|
+
if value.size <= 255 # If no more than 255 chars
|
496
|
+
value
|
497
|
+
else
|
498
|
+
"Value too large to output (#{value.bytesize} bytes)! First 255 chars are: #{value[0..255]}"
|
499
|
+
end
|
500
|
+
end
|
417
501
|
end
|
418
502
|
|
503
|
+
def with_timeout(context, &block)
|
504
|
+
@timeout.exec(&block)
|
505
|
+
rescue TimeoutError => error
|
506
|
+
handle_timeout(context, error)
|
507
|
+
end
|
508
|
+
public :with_timeout
|
509
|
+
|
510
|
+
def handle_timeout(context, error)
|
511
|
+
raise GrokTimeoutException.new(context.grok, context.field, context.input)
|
512
|
+
end
|
513
|
+
|
514
|
+
# @private
|
515
|
+
class GrokContext
|
516
|
+
attr_reader :grok, :field, :input
|
517
|
+
|
518
|
+
def initialize(field, input)
|
519
|
+
@field = field
|
520
|
+
@input = input
|
521
|
+
end
|
522
|
+
|
523
|
+
def set_grok(grok)
|
524
|
+
@grok = grok
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
# @private
|
529
|
+
class NoopTimeout
|
530
|
+
INSTANCE = new
|
531
|
+
|
532
|
+
def exec
|
533
|
+
yield
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
537
|
+
# @private
|
538
|
+
class RubyTimeout
|
539
|
+
def initialize(timeout_millis)
|
540
|
+
# divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying
|
541
|
+
# executor resolution is in microseconds so fractional second parameter down to microseconds is possible.
|
542
|
+
# see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
|
543
|
+
@timeout_seconds = timeout_millis / 1000.0
|
544
|
+
end
|
545
|
+
|
546
|
+
def exec(&block)
|
547
|
+
Timeout.timeout(@timeout_seconds, TimeoutError, &block)
|
548
|
+
end
|
549
|
+
end
|
419
550
|
end # class LogStash::Filters::Grok
|