logstash-filter-translate 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module File
4
+ class Exact
5
+ def initialize(dictionary, rw_lock)
6
+ @dictionary = dictionary
7
+ @read_lock = rw_lock.readLock
8
+ end
9
+
10
+ def dictionary_updated
11
+ end
12
+
13
+ def fetch(source, results)
14
+ @read_lock.lock
15
+ begin
16
+ if @dictionary.include?(source)
17
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
18
+ else
19
+ results[0] = false
20
+ end
21
+ ensure
22
+ @read_lock.unlock
23
+ end
24
+ end
25
+ end
26
+
27
+ class ExactRegex
28
+ def initialize(dictionary, rw_lock)
29
+ @keys_regex = Hash.new()
30
+ @dictionary = dictionary
31
+ @read_lock = rw_lock.readLock
32
+ end
33
+
34
+ def dictionary_updated
35
+ @keys_regex.clear
36
+ # rebuilding the regex map is time expensive
37
+ # 100 000 keys takes 0.5 seconds on a high spec Macbook Pro
38
+ # at least we are not doing it for every event like before
39
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
40
+ end
41
+
42
+ def fetch(source, results)
43
+ @read_lock.lock
44
+ begin
45
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
46
+ if key.nil?
47
+ results[0] = false
48
+ else
49
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
50
+ end
51
+ ensure
52
+ @read_lock.unlock
53
+ end
54
+ end
55
+ end
56
+
57
+ class RegexUnion
58
+ def initialize(dictionary, rw_lock)
59
+ @dictionary = dictionary
60
+ @read_lock = rw_lock.readLock
61
+ end
62
+
63
+ def dictionary_updated
64
+ @union_regex_keys = Regexp.union(@dictionary.keys)
65
+ end
66
+
67
+ def fetch(source, results)
68
+ @read_lock.lock
69
+ begin
70
+ value = source.gsub(@union_regex_keys, @dictionary)
71
+ if source == value
72
+ results[0] = false
73
+ else
74
+ results[1] = LogStash::Util.deep_clone(value)
75
+ end
76
+ ensure
77
+ @read_lock.unlock
78
+ end
79
+ end
80
+ end
81
+ end end end end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters module FetchStrategy module Memory
4
+ class Exact
5
+ def initialize(dictionary)
6
+ @dictionary = dictionary
7
+ end
8
+
9
+ def fetch(source, results)
10
+ if @dictionary.include?(source)
11
+ results[1] = LogStash::Util.deep_clone(@dictionary[source])
12
+ else
13
+ results[0] = false
14
+ end
15
+ end
16
+ end
17
+
18
+ class ExactRegex
19
+ def initialize(dictionary)
20
+ @keys_regex = Hash.new()
21
+ @dictionary = dictionary
22
+ @dictionary.keys.each{|k| @keys_regex[k] = Regexp.new(k)}
23
+ end
24
+
25
+ def fetch(source, results)
26
+ key = @dictionary.keys.detect{|k| source.match(@keys_regex[k])}
27
+ if key.nil?
28
+ results[0] = false
29
+ else
30
+ results[1] = LogStash::Util.deep_clone(@dictionary[key])
31
+ end
32
+ end
33
+ end
34
+
35
+ class RegexUnion
36
+ def initialize(dictionary)
37
+ @dictionary = dictionary
38
+ @union_regex_keys = Regexp.union(@dictionary.keys)
39
+ end
40
+
41
+ def fetch(source, results)
42
+ value = source.gsub(@union_regex_keys, @dictionary)
43
+ if source == value
44
+ results[0] = false
45
+ else
46
+ results[1] = LogStash::Util.deep_clone(value)
47
+ end
48
+ end
49
+ end
50
+ end end end end
51
+
52
+
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ module LogStash module Filters
4
+ class SingleValueUpdate
5
+ class CoerceString
6
+ def call(source) source; end
7
+ end
8
+ class CoerceArray
9
+ def call(source) source.first.to_s; end
10
+ end
11
+ class CoerceOther
12
+ def call(source) source.to_s end
13
+ end
14
+
15
+ def initialize(field, destination, fallback, lookup)
16
+ @field = field
17
+ @destination = destination
18
+ @fallback = fallback
19
+ @use_fallback = !fallback.nil? # fallback is not nil, the user set a value in the config
20
+ @lookup = lookup
21
+ @coercers_table = {}
22
+ @coercers_table.default = CoerceOther.new
23
+ @coercers_table[String] = CoerceString.new
24
+ @coercers_table[Array] = CoerceArray.new
25
+ end
26
+
27
+ def test_for_inclusion(event, override)
28
+ # Skip translation in case @destination field already exists and @override is disabled.
29
+ return false if !override && event.include?(@destination)
30
+ event.include?(@field)
31
+ end
32
+
33
+ def update(event)
34
+ # If source field is array use first value and make sure source value is string
35
+ # source = Array(event.get(@field)).first.to_s
36
+ source = event.get(@field)
37
+ source = @coercers_table[source.class].call(source)
38
+ matched = [true, nil]
39
+ @lookup.fetch_strategy.fetch(source, matched)
40
+ if matched.first
41
+ event.set(@destination, matched.last)
42
+ elsif @use_fallback
43
+ event.set(@destination, event.sprintf(@fallback))
44
+ matched[0] = true
45
+ end
46
+ return matched.first
47
+ end
48
+ end
49
+ end end
@@ -1,14 +1,21 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/filters/base"
3
3
  require "logstash/namespace"
4
- require "json"
5
- require "csv"
6
-
7
- java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
-
9
-
4
+ require 'logstash/plugin_mixins/ecs_compatibility_support'
5
+ require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter'
6
+ require 'logstash/plugin_mixins/deprecation_logger_support'
7
+
8
+ require "logstash/filters/dictionary/memory"
9
+ require "logstash/filters/dictionary/file"
10
+ require "logstash/filters/dictionary/csv_file"
11
+ require "logstash/filters/dictionary/yaml_file"
12
+ require "logstash/filters/dictionary/json_file"
13
+
14
+ require_relative "single_value_update"
15
+ require_relative "array_of_values_update"
16
+ require_relative "array_of_maps_value_update"
10
17
  # A general search and replace tool that uses a configured hash
11
- # and/or a file to determine replacement values. Currently supported are
18
+ # and/or a file to determine replacement values. Currently supported are
12
19
  # YAML, JSON, and CSV files.
13
20
  #
14
21
  # The dictionary entries can be specified in one of two ways: First,
@@ -22,27 +29,38 @@ java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
22
29
  # `regex` configuration item has been enabled), the field's value will be substituted
23
30
  # with the matched key's value from the dictionary.
24
31
  #
25
- # By default, the translate filter will replace the contents of the
32
+ # By default, the translate filter will replace the contents of the
26
33
  # maching event field (in-place). However, by using the `destination`
27
34
  # configuration item, you may also specify a target event field to
28
35
  # populate with the new translated value.
29
- #
36
+ #
30
37
  # Alternatively, for simple string search and replacements for just a few values
31
38
  # you might consider using the gsub function of the mutate filter.
39
+ module LogStash module Filters
40
+ class Translate < LogStash::Filters::Base
41
+
42
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
43
+ include LogStash::PluginMixins::DeprecationLoggerSupport
44
+
45
+ extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter
32
46
 
33
- class LogStash::Filters::Translate < LogStash::Filters::Base
34
47
  config_name "translate"
35
48
 
36
49
  # The name of the logstash event field containing the value to be compared for a
37
- # match by the translate filter (e.g. `message`, `host`, `response_code`).
38
- #
39
- # If this field is an array, only the first value will be used.
40
- config :field, :validate => :string, :required => true
50
+ # match by the translate filter (e.g. `message`, `host`, `response_code`).
51
+ #
52
+ # If this field is an array, only the first value will be used, unless
53
+ # you specify `iterate_on`. See below. If you want to use another element
54
+ # in the array then use `"[some_field][2]"`
55
+ config :source, :validate => :field_reference # effectively :required => true
56
+ # due compatibility w `field => ...` (non ECS mode) we can not mark it as required
57
+
58
+ config :field, :validate => :string, :deprecated => "Use `source` option instead."
41
59
 
42
60
  # If the destination (or target) field already exists, this configuration item specifies
43
61
  # whether the filter should skip translation (default) or overwrite the target field
44
62
  # value with the new translation value.
45
- config :override, :validate => :boolean, :default => false
63
+ config :override, :validate => :boolean # :default => false unless field == target
46
64
 
47
65
  # The dictionary to use for translation, when specified in the logstash filter
48
66
  # configuration item (i.e. do not use the `@dictionary_path` file).
@@ -63,8 +81,8 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
63
81
 
64
82
  # The full path of the external dictionary file. The format of the table should
65
83
  # be a standard YAML, JSON or CSV with filenames ending in `.yaml`, `.yml`,
66
- #`.json` or `.csv` to be read. Make sure you specify any integer-based keys in
67
- # quotes. For example, the YAML file (`.yaml` or `.yml` should look something like
84
+ #`.json` or `.csv` to be read. Make sure you specify any integer-based keys in
85
+ # quotes. For example, the YAML file (`.yaml` or `.yml` should look something like
68
86
  # this:
69
87
  # [source,ruby]
70
88
  # "100": Continue
@@ -85,11 +103,13 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
85
103
  # (in seconds) logstash will check the dictionary file for updates.
86
104
  config :refresh_interval, :validate => :number, :default => 300
87
105
 
88
- # The destination field you wish to populate with the translated code. The default
89
- # is a field named `translation`. Set this to the same value as source if you want
90
- # to do a substitution, in this case filter will allways succeed. This will clobber
91
- # the old value of the source field!
92
- config :destination, :validate => :string, :default => "translation"
106
+ # The target field you wish to populate with the translation.
107
+ # When ECS Compatibility is enabled, the default is an in-place translation that
108
+ # will replace the value of the source field.
109
+ # When ECS Compatibility is disabled, this option falls through to the deprecated `destination` field.
110
+ config :target, :validate => :field_reference
111
+
112
+ config :destination, :validate => :string, :deprecated => "Use `target` option instead." # :default => "translation" (legacy)
93
113
 
94
114
  # When `exact => true`, the translate filter will populate the destination field
95
115
  # with the exact contents of the dictionary value. When `exact => false`, the
@@ -107,11 +127,11 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
107
127
  # will be also set to `bar`. However, if logstash receives an event with the `data` field
108
128
  # set to `foofing`, the destination field will be set to `barfing`.
109
129
  #
110
- # Set both `exact => true` AND `regex => `true` if you would like to match using dictionary
111
- # keys as regular expressions. A large dictionary could be expensive to match in this case.
130
+ # Set both `exact => true` AND `regex => true` if you would like to match using dictionary
131
+ # keys as regular expressions. A large dictionary could be expensive to match in this case.
112
132
  config :exact, :validate => :boolean, :default => true
113
133
 
114
- # If you'd like to treat dictionary keys as regular expressions, set `exact => true`.
134
+ # If you'd like to treat dictionary keys as regular expressions, set `regex => true`.
115
135
  # Note: this is activated only when `exact => true`.
116
136
  config :regex, :validate => :boolean, :default => false
117
137
 
@@ -133,11 +153,21 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
133
153
  # deletes old entries on update.
134
154
  config :refresh_behaviour, :validate => ['merge', 'replace'], :default => 'merge'
135
155
 
156
+ # When the value that you need to perform enrichment on is a variable sized array then specify
157
+ # the field name in this setting. This setting introduces two modes, 1) when the value is an
158
+ # array of strings and 2) when the value is an array of objects (as in JSON object).
159
+ # In the first mode, you should have the same field name in both `field` and `iterate_on`, the
160
+ # result will be an array added to the field specified in the `destination` setting. This array
161
+ # will have the looked up value (or the `fallback` value or nil) in same ordinal position
162
+ # as each sought value. In the second mode, specify the field that has the array of objects
163
+ # then specify the field in each object that provides the sought value with `field` and
164
+ # the field to write the looked up value (or the `fallback` value) to with `destination`
165
+ config :iterate_on, :validate => :string
166
+
167
+ attr_reader :lookup # for testing reloading
168
+ attr_reader :updater # for tests
169
+
136
170
  def register
137
- rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
138
- @read_lock = rw_lock.readLock
139
- @write_lock = rw_lock.writeLock
140
-
141
171
  if @dictionary_path && !@dictionary.empty?
142
172
  raise LogStash::ConfigurationError, I18n.t(
143
173
  "logstash.agent.configuration.invalid_plugin_register",
@@ -148,153 +178,69 @@ class LogStash::Filters::Translate < LogStash::Filters::Base
148
178
  end
149
179
 
150
180
  if @dictionary_path
151
- @next_refresh = Time.now + @refresh_interval
152
- raise_exception = true
153
- lock_for_write { load_dictionary(raise_exception) }
154
- end
155
-
156
- @logger.debug? and @logger.debug("#{self.class.name}: Dictionary - ", :dictionary => @dictionary)
157
- if @exact
158
- @logger.debug? and @logger.debug("#{self.class.name}: Dictionary translation method - Exact")
181
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
159
182
  else
160
- @logger.debug? and @logger.debug("#{self.class.name}: Dictionary translation method - Fuzzy")
183
+ @lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
161
184
  end
162
- end # def register
163
185
 
164
- def lock_for_read
165
- @read_lock.lock
166
- begin
167
- yield
168
- ensure
169
- @read_lock.unlock
170
- end
171
- end
172
-
173
- def lock_for_write
174
- @write_lock.lock
175
- begin
176
- yield
177
- ensure
178
- @write_lock.unlock
179
- end
180
- end
181
-
182
- def filter(event)
183
- if @dictionary_path
184
- if needs_refresh?
185
- lock_for_write do
186
- if needs_refresh?
187
- load_dictionary
188
- @next_refresh = Time.now + @refresh_interval
189
- @logger.info("refreshing dictionary file")
190
- end
191
- end
186
+ if @field
187
+ if @source
188
+ raise LogStash::ConfigurationError, "Please remove `field => #{@field.inspect}` and only set the `source => ...` option instead"
189
+ else
190
+ deprecation_logger.deprecated("`field` option is deprecated; use `source` instead.")
191
+ logger.debug("intercepting `field` to populate `source`: `#{@field}`")
192
+ @source = @field
192
193
  end
193
194
  end
195
+ unless @source
196
+ raise LogStash::ConfigurationError, "No source field specified, please provide the `source => ...` option"
197
+ end
194
198
 
195
- return unless event.include?(@field) # Skip translation in case event does not have @event field.
196
- return if event.include?(@destination) and not @override # Skip translation in case @destination field already exists and @override is disabled.
197
-
198
- begin
199
- #If source field is array use first value and make sure source value is string
200
- source = event.get(@field).is_a?(Array) ? event.get(@field).first.to_s : event.get(@field).to_s
201
- matched = false
202
- if @exact
203
- if @regex
204
- key = @dictionary.keys.detect{|k| source.match(Regexp.new(k))}
205
- if key
206
- event.set(@destination, lock_for_read { @dictionary[key] })
207
- matched = true
208
- end
209
- elsif @dictionary.include?(source)
210
- event.set(@destination, lock_for_read { @dictionary[source] })
211
- matched = true
212
- end
199
+ if @destination
200
+ if @target
201
+ raise LogStash::ConfigurationError, "Please remove `destination => #{@destination.inspect}` and only set the `target => ...` option instead"
213
202
  else
214
- translation = lock_for_read { source.gsub(Regexp.union(@dictionary.keys), @dictionary) }
215
-
216
- if source != translation
217
- event.set(@destination, translation.force_encoding(Encoding::UTF_8))
218
- matched = true
219
- end
203
+ deprecation_logger.deprecated("`destination` option is deprecated; use `target` instead.")
204
+ logger.debug("intercepting `destination` to populate `target`: `#{@destination}`")
205
+ @target = @destination
220
206
  end
207
+ end
208
+ @target ||= ecs_select[disabled: 'translation', v1: @source]
221
209
 
222
- if not matched and @fallback
223
- event.set(@destination, event.sprintf(@fallback))
224
- matched = true
210
+ if @source == @target
211
+ @override = true if @override.nil?
212
+ if @override.eql?(false)
213
+ raise LogStash::ConfigurationError, "Configuring `override => false` with in-place translation has no effect, please remove the option"
225
214
  end
226
- filter_matched(event) if matched or @field == @destination
227
- rescue Exception => e
228
- @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :field => @field, :event => event)
229
215
  end
230
- end # def filter
231
216
 
232
- private
233
-
234
- def load_dictionary(raise_exception=false)
235
- if /.y[a]?ml$/.match(@dictionary_path)
236
- load_yaml(raise_exception)
237
- elsif @dictionary_path.end_with?(".json")
238
- load_json(raise_exception)
239
- elsif @dictionary_path.end_with?(".csv")
240
- load_csv(raise_exception)
217
+ if @iterate_on.nil?
218
+ @updater = SingleValueUpdate.new(@source, @target, @fallback, @lookup)
219
+ elsif @iterate_on == @source
220
+ @updater = ArrayOfValuesUpdate.new(@iterate_on, @target, @fallback, @lookup)
241
221
  else
242
- raise "#{self.class.name}: Dictionary #{@dictionary_path} have a non valid format"
243
- end
244
- rescue => e
245
- loading_exception(e, raise_exception)
246
- end
247
-
248
- def load_yaml(raise_exception=false)
249
- if !File.exists?(@dictionary_path)
250
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
251
- return
252
- end
253
- refresh_dictionary!(YAML.load_file(@dictionary_path))
254
- end
255
-
256
- def load_json(raise_exception=false)
257
- if !File.exists?(@dictionary_path)
258
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
259
- return
260
- end
261
- refresh_dictionary!(JSON.parse(File.read(@dictionary_path)))
262
- end
263
-
264
- def load_csv(raise_exception=false)
265
- if !File.exists?(@dictionary_path)
266
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
267
- return
268
- end
269
- data = CSV.read(@dictionary_path).inject(Hash.new) do |acc, v|
270
- acc[v[0]] = v[1]
271
- acc
222
+ @updater = ArrayOfMapsValueUpdate.new(@iterate_on, @source, @target, @fallback, @lookup)
272
223
  end
273
- refresh_dictionary!(data)
274
- end
275
224
 
276
- def refresh_dictionary!(data)
277
- case @refresh_behaviour
278
- when 'merge'
279
- @dictionary.merge!(data)
280
- when 'replace'
281
- @dictionary = data
225
+ @logger.debug? && @logger.debug("#{self.class.name}: Dictionary - ", :dictionary => @lookup.dictionary)
226
+ if @exact
227
+ @logger.debug? && @logger.debug("#{self.class.name}: Dictionary translation method - Exact")
282
228
  else
283
- # we really should never get here
284
- raise(LogStash::ConfigurationError, "Unknown value for refresh_behaviour=#{@refresh_behaviour.to_s}")
229
+ @logger.debug? && @logger.debug("#{self.class.name}: Dictionary translation method - Fuzzy")
285
230
  end
286
- end
231
+ end # def register
287
232
 
288
- def loading_exception(e, raise_exception=false)
289
- msg = "#{self.class.name}: #{e.message} when loading dictionary file at #{@dictionary_path}"
290
- if raise_exception
291
- raise RuntimeError.new(msg)
292
- else
293
- @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
294
- end
233
+ def close
234
+ @lookup.stop_scheduler
295
235
  end
296
236
 
297
- def needs_refresh?
298
- @next_refresh < Time.now
299
- end
237
+ def filter(event)
238
+ return unless @updater.test_for_inclusion(event, @override)
239
+ begin
240
+ filter_matched(event) if @updater.update(event) || @source == @target
241
+ rescue => e
242
+ @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
243
+ end
244
+ end # def filter
300
245
  end # class LogStash::Filters::Translate
246
+ end end