logstash-filter-translate 3.1.0 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module File
4
+ class Exact
5
+ def initialize(dictionary, rw_lock)
6
+ @dictionary = dictionary
7
+ @read_lock = rw_lock.readLock
8
+ end
9
+
10
+ def dictionary_updated
11
+ end
12
+
13
+ def fetch(source, results)
14
+ @read_lock.lock
15
+ begin
16
+ if @dictionary.include?(source)
17
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
18
+ else
19
+ results[0] = false
20
+ end
21
+ ensure
22
+ @read_lock.unlock
23
+ end
24
+ end
25
+ end
26
+
27
+ class ExactRegex
28
+ def initialize(dictionary, rw_lock)
29
+ @keys_regex = Hash.new()
30
+ @dictionary = dictionary
31
+ @read_lock = rw_lock.readLock
32
+ end
33
+
34
+ def dictionary_updated
35
+ @keys_regex.clear
36
+ # rebuilding the regex map is time expensive
37
+ # 100 000 keys takes 0.5 seconds on a high spec Macbook Pro
38
+ # at least we are not doing it for every event like before
39
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
40
+ end
41
+
42
+ def fetch(source, results)
43
+ @read_lock.lock
44
+ begin
45
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
46
+ if key.nil?
47
+ results[0] = false
48
+ else
49
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
50
+ end
51
+ ensure
52
+ @read_lock.unlock
53
+ end
54
+ end
55
+ end
56
+
57
+ class RegexUnion
58
+ def initialize(dictionary, rw_lock)
59
+ @dictionary = dictionary
60
+ @read_lock = rw_lock.readLock
61
+ end
62
+
63
+ def dictionary_updated
64
+ @union_regex_keys = Regexp.union(@dictionary.keys)
65
+ end
66
+
67
+ def fetch(source, results)
68
+ @read_lock.lock
69
+ begin
70
+ value = source.gsub(@union_regex_keys, @dictionary)
71
+ if source == value
72
+ results[0] = false
73
+ else
74
+ results[1] = LogStash::Util.deep_clone(value)
75
+ end
76
+ ensure
77
+ @read_lock.unlock
78
+ end
79
+ end
80
+ end
81
+ end end end end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module Memory
4
+ class Exact
5
+ def initialize(dictionary)
6
+ @dictionary = dictionary
7
+ end
8
+
9
+ def fetch(source, results)
10
+ if @dictionary.include?(source)
11
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
12
+ else
13
+ results[0] = false
14
+ end
15
+ end
16
+ end
17
+
18
+ class ExactRegex
19
+ def initialize(dictionary)
20
+ @keys_regex = Hash.new()
21
+ @dictionary = dictionary
22
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
23
+ end
24
+
25
+ def fetch(source, results)
26
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
27
+ if key.nil?
28
+ results[0] = false
29
+ else
30
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
31
+ end
32
+ end
33
+ end
34
+
35
+ class RegexUnion
36
+ def initialize(dictionary)
37
+ @dictionary = dictionary
38
+ @union_regex_keys = Regexp.union(@dictionary.keys)
39
+ end
40
+
41
+ def fetch(source, results)
42
+ value = source.gsub(@union_regex_keys, @dictionary)
43
+ if source == value
44
+ results[0] = false
45
+ else
46
+ results[1] = LogStash::Util.deep_clone(value)
47
+ end
48
+ end
49
+ end
50
+ end end end end
51
+
52
+
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters
4
+ class SingleValueUpdate
5
+ class CoerceString
6
+ def call(source) source; end
7
+ end
8
+ class CoerceArray
9
+ def call(source) source.first.to_s; end
10
+ end
11
+ class CoerceOther
12
+ def call(source) source.to_s end
13
+ end
14
+
15
+ def initialize(field, destination, fallback, lookup)
16
+ @field = field
17
+ @destination = destination
18
+ @fallback = fallback
19
+ @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
20
+ @lookup = lookup
21
+ @coercers_table = {}
22
+ @coercers_table.default = CoerceOther.new
23
+ @coercers_table[String] = CoerceString.new
24
+ @coercers_table[Array] = CoerceArray.new
25
+ end
26
+
27
+ def test_for_inclusion(event, override)
28
+ # Skip translation in case @destination field already exists and @override is disabled.
29
+ return false if !override && event.include?(@destination)
30
+ event.include?(@field)
31
+ end
32
+
33
+ def update(event)
34
+ # If source field is array use first value and make sure source value is string
35
+ # source = Array(event.get(@field)).first.to_s
36
+ source = event.get(@field)
37
+ source = @coercers_table[source.class].call(source)
38
+ matched = [true, nil]
39
+ @lookup.fetch_strategy.fetch(source, matched)
40
+ if matched.first
41
+ event.set(@destination, matched.last)
42
+ elsif @use_fallback
43
+ event.set(@destination, event.sprintf(@fallback))
44
+ matched[0] = true
45
+ end
46
+ return matched.first
47
+ end
48
+ end
49
+ end end
@@ -1,14 +1,21 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/filters/base"
3
3
  require "logstash/namespace"
4
- require "json"
5
- require "csv"
6
-
7
- java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
-
9
-
4
+ require 'logstash/plugin_mixins/ecs_compatibility_support'
5
+ require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter'
6
+ require 'logstash/plugin_mixins/deprecation_logger_support'
7
+
8
+ require "logstash/filters/dictionary/memory"
9
+ require "logstash/filters/dictionary/file"
10
+ require "logstash/filters/dictionary/csv_file"
11
+ require "logstash/filters/dictionary/yaml_file"
12
+ require "logstash/filters/dictionary/json_file"
13
+
14
+ require_relative "single_value_update"
15
+ require_relative "array_of_values_update"
16
+ require_relative "array_of_maps_value_update"
10
17
  # A general search and replace tool that uses a configured hash
11
- # and/or a file to determine replacement values. Currently supported are
18
+ # and/or a file to determine replacement values. Currently supported are
12
19
  # YAML, JSON, and CSV files.
13
20
  #
14
21
  # The dictionary entries can be specified in one of two ways: First,
@@ -22,27 +29,38 @@ java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
22
29
  # `regex` configuration item has been enabled), the field's value will be substituted
23
30
  # with the matched key's value from the dictionary.
24
31
  #
25
- # By default, the translate filter will replace the contents of the
32
+ # By default, the translate filter will replace the contents of the
26
33
  # maching event field (in-place). However, by using the `destination`
27
34
  # configuration item, you may also specify a target event field to
28
35
  # populate with the new translated value.
29
- #
36
+ #
30
37
  # Alternatively, for simple string search and replacements for just a few values
31
38
  # you might consider using the gsub function of the mutate filter.
39
+ module LogStash module Filters
40
+ class Translate < LogStash::Filters::Base
41
+
42
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
43
+ include LogStash::PluginMixins::DeprecationLoggerSupport
44
+
45
+ extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter
32
46
 
33
- class LogStash::Filters::Translate < LogStash::Filters::Base
34
47
  config_name "translate"
35
48
 
36
49
  # The name of the logstash event field containing the value to be compared for a
37
- # match by the translate filter (e.g. `message`, `host`, `response_code`).
38
- #
39
- # If this field is an array, only the first value will be used.
40
- config :field, :validate => :string, :required => true
50
+ # match by the translate filter (e.g. `message`, `host`, `response_code`).
51
+ #
52
+ # If this field is an array, only the first value will be used, unless
53
+ # you specify `iterate_on`. See below. If you want to use another element
54
+ # in the array then use `"[some_field][2]"`
55
+ config :source, :validate => :field_reference # effectively :required => true
56
+ # due compatibility w `field => ...` (non ECS mode) we can not mark it as required
57
+
58
+ config :field, :validate => :string, :deprecated => "Use `source` option instead."
41
59
 
42
60
  # If the destination (or target) field already exists, this configuration item specifies
43
61
  # whether the filter should skip translation (default) or overwrite the target field
44
62
  # value with the new translation value.
45
- config :override, :validate => :boolean, :default => false
63
+ config :override, :validate => :boolean # :default => false unless field == target
46
64
 
47
65
  # The dictionary to use for translation, when specified in the logstash filter
48
66
  # configuration item (i.e. do not use the `@dictionary_path` file).
@@ -63,8 +81,8 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
63
81
 
64
82
  # The full path of the external dictionary file. The format of the table should
65
83
  # be a standard YAML, JSON or CSV with filenames ending in `.yaml`, `.yml`,
66
- #`.json` or `.csv` to be read. Make sure you specify any integer-based keys in
67
- # quotes. For example, the YAML file (`.yaml` or `.yml` should look something like
84
+ #`.json` or `.csv` to be read. Make sure you specify any integer-based keys in
85
+ # quotes. For example, the YAML file (`.yaml` or `.yml` should look something like
68
86
  # this:
69
87
  # [source,ruby]
70
88
  # "100": Continue
@@ -85,11 +103,13 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
85
103
  # (in seconds) logstash will check the dictionary file for updates.
86
104
  config :refresh_interval, :validate => :number, :default => 300
87
105
 
88
- # The destination field you wish to populate with the translated code. The default
89
- # is a field named `translation`. Set this to the same value as source if you want
90
- # to do a substitution, in this case filter will allways succeed. This will clobber
91
- # the old value of the source field!
92
- config :destination, :validate => :string, :default => "translation"
106
+ # The target field you wish to populate with the translation.
107
+ # When ECS Compatibility is enabled, the default is an in-place translation that
108
+ # will replace the value of the source field.
109
+ # When ECS Compatibility is disabled, this option falls through to the deprecated `destination` field.
110
+ config :target, :validate => :field_reference
111
+
112
+ config :destination, :validate => :string, :deprecated => "Use `target` option instead." # :default => "translation" (legacy)
93
113
 
94
114
  # When `exact => true`, the translate filter will populate the destination field
95
115
  # with the exact contents of the dictionary value. When `exact => false`, the
@@ -107,11 +127,11 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
107
127
  # will be also set to `bar`. However, if logstash receives an event with the `data` field
108
128
  # set to `foofing`, the destination field will be set to `barfing`.
109
129
  #
110
- # Set both `exact => true` AND `regex => `true` if you would like to match using dictionary
111
- # keys as regular expressions. A large dictionary could be expensive to match in this case.
130
+ # Set both `exact => true` AND `regex => true` if you would like to match using dictionary
131
+ # keys as regular expressions. A large dictionary could be expensive to match in this case.
112
132
  config :exact, :validate => :boolean, :default => true
113
133
 
114
- # If you'd like to treat dictionary keys as regular expressions, set `exact => true`.
134
+ # If you'd like to treat dictionary keys as regular expressions, set `regex => true`.
115
135
  # Note: this is activated only when `exact => true`.
116
136
  config :regex, :validate => :boolean, :default => false
117
137
 
@@ -133,11 +153,21 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
133
153
  # deletes old entries on update.
134
154
  config :refresh_behaviour, :validate => ['merge', 'replace'], :default => 'merge'
135
155
 
156
+ # When the value that you need to perform enrichment on is a variable sized array then specify
157
+ # the field name in this setting. This setting introduces two modes, 1) when the value is an
158
+ # array of strings and 2) when the value is an array of objects (as in JSON object).
159
+ # In the first mode, you should have the same field name in both `field` and `iterate_on`, the
160
+ # result will be an array added to the field specified in the `destination` setting. This array
161
+ # will have the looked up value (or the `fallback` value or nil) in same ordinal position
162
+ # as each sought value. In the second mode, specify the field that has the array of objects
163
+ # then specify the field in each object that provides the sought value with `field` and
164
+ # the field to write the looked up value (or the `fallback` value) to with `destination`
165
+ config :iterate_on, :validate => :string
166
+
167
+ attr_reader :lookup # for testing reloading
168
+ attr_reader :updater # for tests
169
+
136
170
  def register
137
- rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
138
- @read_lock = rw_lock.readLock
139
- @write_lock = rw_lock.writeLock
140
-
141
171
  if @dictionary_path && !@dictionary.empty?
142
172
  raise LogStash::ConfigurationError, I18n.t(
143
173
  "logstash.agent.configuration.invalid_plugin_register",
@@ -148,153 +178,69 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
148
178
  end
149
179
 
150
180
  if @dictionary_path
151
- @next_refresh = Time.now + @refresh_interval
152
- raise_exception = true
153
- lock_for_write { load_dictionary(raise_exception) }
154
- end
155
-
156
- @logger.debug? and @logger.debug("#{self.class.name}: Dictionary - ", :dictionary => @dictionary)
157
- if @exact
158
- @logger.debug? and @logger.debug("#{self.class.name}: Dictionary translation method - Exact")
181
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
159
182
  else
160
- @logger.debug? and @logger.debug("#{self.class.name}: Dictionary translation method - Fuzzy")
183
+ @lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
161
184
  end
162
- end # def register
163
185
 
164
- def lock_for_read
165
- @read_lock.lock
166
- begin
167
- yield
168
- ensure
169
- @read_lock.unlock
170
- end
171
- end
172
-
173
- def lock_for_write
174
- @write_lock.lock
175
- begin
176
- yield
177
- ensure
178
- @write_lock.unlock
179
- end
180
- end
181
-
182
- def filter(event)
183
- if @dictionary_path
184
- if needs_refresh?
185
- lock_for_write do
186
- if needs_refresh?
187
- load_dictionary
188
- @next_refresh = Time.now + @refresh_interval
189
- @logger.info("refreshing dictionary file")
190
- end
191
- end
186
+ if @field
187
+ if @source
188
+ raise LogStash::ConfigurationError, "Please remove `field => #{@field.inspect}` and only set the `source => ...` option instead"
189
+ else
190
+ deprecation_logger.deprecated("`field` option is deprecated; use `source` instead.")
191
+ logger.debug("intercepting `field` to populate `source`: `#{@field}`")
192
+ @source = @field
192
193
  end
193
194
  end
195
+ unless @source
196
+ raise LogStash::ConfigurationError, "No source field specified, please provide the `source => ...` option"
197
+ end
194
198
 
195
- return unless event.include?(@field) # Skip translation in case event does not have @event field.
196
- return if event.include?(@destination) and not @override # Skip translation in case @destination field already exists and @override is disabled.
197
-
198
- begin
199
- #If source field is array use first value and make sure source value is string
200
- source = event.get(@field).is_a?(Array) ? event.get(@field).first.to_s : event.get(@field).to_s
201
- matched = false
202
- if @exact
203
- if @regex
204
- key = @dictionary.keys.detect{|k| source.match(Regexp.new(k))}
205
- if key
206
- event.set(@destination, lock_for_read { @dictionary[key] })
207
- matched = true
208
- end
209
- elsif @dictionary.include?(source)
210
- event.set(@destination, lock_for_read { @dictionary[source] })
211
- matched = true
212
- end
199
+ if @destination
200
+ if @target
201
+ raise LogStash::ConfigurationError, "Please remove `destination => #{@destination.inspect}` and only set the `target => ...` option instead"
213
202
  else
214
- translation = lock_for_read { source.gsub(Regexp.union(@dictionary.keys), @dictionary) }
215
-
216
- if source != translation
217
- event.set(@destination, translation.force_encoding(Encoding::UTF_8))
218
- matched = true
219
- end
203
+ deprecation_logger.deprecated("`destination` option is deprecated; use `target` instead.")
204
+ logger.debug("intercepting `destination` to populate `target`: `#{@destination}`")
205
+ @target = @destination
220
206
  end
207
+ end
208
+ @target ||= ecs_select[disabled: 'translation', v1: @source]
221
209
 
222
- if not matched and @fallback
223
- event.set(@destination, event.sprintf(@fallback))
224
- matched = true
210
+ if @source == @target
211
+ @override = true if @override.nil?
212
+ if @override.eql?(false)
213
+ raise LogStash::ConfigurationError, "Configuring `override => false` with in-place translation has no effect, please remove the option"
225
214
  end
226
- filter_matched(event) if matched or @field == @destination
227
- rescue Exception => e
228
- @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :field => @field, :event => event)
229
215
  end
230
- end # def filter
231
216
 
232
- private
233
-
234
- def load_dictionary(raise_exception=false)
235
- if /.y[a]?ml$/.match(@dictionary_path)
236
- load_yaml(raise_exception)
237
- elsif @dictionary_path.end_with?(".json")
238
- load_json(raise_exception)
239
- elsif @dictionary_path.end_with?(".csv")
240
- load_csv(raise_exception)
217
+ if @iterate_on.nil?
218
+ @updater = SingleValueUpdate.new(@source, @target, @fallback, @lookup)
219
+ elsif @iterate_on == @source
220
+ @updater = ArrayOfValuesUpdate.new(@iterate_on, @target, @fallback, @lookup)
241
221
  else
242
- raise "#{self.class.name}: Dictionary #{@dictionary_path} have a non valid format"
243
- end
244
- rescue => e
245
- loading_exception(e, raise_exception)
246
- end
247
-
248
- def load_yaml(raise_exception=false)
249
- if !File.exists?(@dictionary_path)
250
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
251
- return
252
- end
253
- refresh_dictionary!(YAML.load_file(@dictionary_path))
254
- end
255
-
256
- def load_json(raise_exception=false)
257
- if !File.exists?(@dictionary_path)
258
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
259
- return
260
- end
261
- refresh_dictionary!(JSON.parse(File.read(@dictionary_path)))
262
- end
263
-
264
- def load_csv(raise_exception=false)
265
- if !File.exists?(@dictionary_path)
266
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
267
- return
268
- end
269
- data = CSV.read(@dictionary_path).inject(Hash.new) do |acc, v|
270
- acc[v[0]] = v[1]
271
- acc
222
+ @updater = ArrayOfMapsValueUpdate.new(@iterate_on, @source, @target, @fallback, @lookup)
272
223
  end
273
- refresh_dictionary!(data)
274
- end
275
224
 
276
- def refresh_dictionary!(data)
277
- case @refresh_behaviour
278
- when 'merge'
279
- @dictionary.merge!(data)
280
- when 'replace'
281
- @dictionary = data
225
+ @logger.debug? && @logger.debug("#{self.class.name}: Dictionary - ", :dictionary => @lookup.dictionary)
226
+ if @exact
227
+ @logger.debug? && @logger.debug("#{self.class.name}: Dictionary translation method - Exact")
282
228
  else
283
- # we really should never get here
284
- raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{@refresh_behaviour.to_s}")
229
+ @logger.debug? && @logger.debug("#{self.class.name}: Dictionary translation method - Fuzzy")
285
230
  end
286
- end
231
+ end # def register
287
232
 
288
- def loading_exception(e, raise_exception=false)
289
- msg = "#{self.class.name}: #{e.message} when loading dictionary file at #{@dictionary_path}"
290
- if raise_exception
291
- raise RuntimeError.new(msg)
292
- else
293
- @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
294
- end
233
+ def close
234
+ @lookup.stop_scheduler
295
235
  end
296
236
 
297
- def needs_refresh?
298
- @next_refresh < Time.now
299
- end
237
+ def filter(event)
238
+ return unless @updater.test_for_inclusion(event, @override)
239
+ begin
240
+ filter_matched(event) if @updater.update(event) || @source == @target
241
+ rescue => e
242
+ @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
243
+ end
244
+ end # def filter
300
245
  end # class LogStash::Filters::Translate
246
+ end end