logstash-filter-translate 3.3.1 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 77bd945dee5e20dbc7eb6837c655e807373ee8923dcbd498c9fcf5b1065c6661
4
- data.tar.gz: 878df6d8dfcc5fa53bb617163b0ddfe6feeb8e876aed9668b337f66dc8472221
3
+ metadata.gz: 2159e15fd76b237d148128893d1ee1de04787628a72f31a889021220d24ecd68
4
+ data.tar.gz: '05348931cfb26f1f6968ebe703c43c9d4b0f744deba529c66bc4b8887805b79d'
5
5
  SHA512:
6
- metadata.gz: d45ec30865d73884f9564f3680fdc1fb230c9fa71507c86d475e6ec0759d13863080f8c237ab34b38ccad08bca0781ab1d8a8523955841db2d0a0334091a9cf3
7
- data.tar.gz: 443284f8db270ec6081a2017bd6597733be886643232d97a7630c99e1b194970183e0a7d070ed511bdb1d8069e7f929a991d608798a35d8bd5408878806945d5
6
+ metadata.gz: e0c5b356db73ea5815fad77e8335c4788857cf3bd9f79688152d0152fbf4e9cfd76297f0d0289a407f502a7475adf389398e2cbc9c69166337dc90dab0db1fc8
7
+ data.tar.gz: c66b52755ff2913c3e840ae08476636c07cd5cb90cebaaf9d8d128f4f0e132cb0b4b332730c2522ff79b1c9f76f2181f12063c54c8bac1adb7afa16c850ae82f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 3.4.1
2
+ - Fix the limitation of the size of yaml file that exceeds 3MB [#97](https://github.com/logstash-plugins/logstash-filter-translate/pull/97)
3
+
4
+ ## 3.4.0
5
+ - Refactor: leverage scheduler mixin [#93](https://github.com/logstash-plugins/logstash-filter-translate/pull/93)
6
+
1
7
  ## 3.3.1
2
8
  - Refactor: reading .csv for JRuby 9.3 compatibility [#94](https://github.com/logstash-plugins/logstash-filter-translate/pull/94)
3
9
 
data/docs/index.asciidoc CHANGED
@@ -108,6 +108,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
108
108
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
109
109
  | <<plugins-{type}s-{plugin}-refresh_behaviour>> |<<string,string>>|No
110
110
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
111
+ | <<plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit>> |<<number,number>>|No
111
112
  |=======================================================================
112
113
 
113
114
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -421,5 +422,15 @@ The target field you wish to populate with the translated code.
421
422
  If you set this value to the same value as `source` field, the plugin does a substitution, and
422
423
  the filter will succeed. This will clobber the old value of the source field!
423
424
 
425
+
426
+ [id="plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit"]
427
+ ===== `yaml_dictionary_code_point_limit`
428
+
429
+ * Value type is <<number,number>>
430
+ * Default value is 134217728 (128MB for 1 byte code points)
431
+
432
+ The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
433
+ This setting is effective for YAML file only. YAML over the limit throws exception.
434
+
424
435
  [id="plugins-{type}s-{plugin}-common-options"]
425
436
  include::{include_path}/{type}.asciidoc[]
@@ -1,18 +1,17 @@
1
1
  # encoding: utf-8
2
- require 'concurrent/atomic/atomic_boolean'
3
- require 'rufus-scheduler'
4
2
  require "logstash/util/loggable"
5
3
  require "logstash/filters/fetch_strategy/file"
6
4
 
7
- java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
-
9
5
  module LogStash module Filters module Dictionary
10
6
  class DictionaryFileError < StandardError; end
11
7
 
12
8
  class File
13
- def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
9
+
10
+ include LogStash::Util::Loggable
11
+
12
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex, **file_type_args)
14
13
  if /\.y[a]?ml$/.match(path)
15
- instance = YamlFile.new(path, refresh_interval, exact, regex)
14
+ instance = YamlFile.new(path, refresh_interval, exact, regex, file_type_args)
16
15
  elsif path.end_with?(".json")
17
16
  instance = JsonFile.new(path, refresh_interval, exact, regex)
18
17
  elsif path.end_with?(".csv")
@@ -30,19 +29,17 @@ module LogStash module Filters module Dictionary
30
29
  end
31
30
  end
32
31
 
33
- include LogStash::Util::Loggable
34
32
  attr_reader :dictionary, :fetch_strategy
35
33
 
36
- def initialize(path, refresh_interval, exact, regex)
34
+ def initialize(path, refresh_interval, exact, regex, **file_type_args)
37
35
  @dictionary_path = path
38
36
  @refresh_interval = refresh_interval
39
37
  @short_refresh = @refresh_interval <= 300
40
- @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
41
38
  rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
42
39
  @write_lock = rw_lock.writeLock
43
40
  @dictionary = Hash.new
44
41
  @update_method = method(:merge_dictionary)
45
- initialize_for_file_type
42
+ initialize_for_file_type(file_type_args)
46
43
  args = [@dictionary, rw_lock]
47
44
  klass = case
48
45
  when exact && regex then FetchStrategy::File::ExactRegex
@@ -51,13 +48,6 @@ module LogStash module Filters module Dictionary
51
48
  end
52
49
  @fetch_strategy = klass.new(*args)
53
50
  load_dictionary(raise_exception = true)
54
- stop_scheduler(initial = true)
55
- start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
56
- end
57
-
58
- def stop_scheduler(initial = false)
59
- @stopping.make_true unless initial
60
- @scheduler.shutdown(:wait) if @scheduler
61
51
  end
62
52
 
63
53
  def load_dictionary(raise_exception=false)
@@ -65,7 +55,7 @@ module LogStash module Filters module Dictionary
65
55
  @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
66
56
  @update_method.call
67
57
  rescue Errno::ENOENT
68
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
58
+ logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
69
59
  rescue => e
70
60
  loading_exception(e, raise_exception)
71
61
  end
@@ -78,7 +68,7 @@ module LogStash module Filters module Dictionary
78
68
 
79
69
  protected
80
70
 
81
- def initialize_for_file_type
71
+ def initialize_for_file_type(**file_type_args)
82
72
  # sub class specific initializer
83
73
  end
84
74
 
@@ -88,13 +78,6 @@ module LogStash module Filters module Dictionary
88
78
 
89
79
  private
90
80
 
91
- def start_scheduler
92
- @scheduler = Rufus::Scheduler.new
93
- @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
94
- reload_dictionary
95
- end
96
- end
97
-
98
81
  def merge_dictionary
99
82
  @write_lock.lock
100
83
  begin
@@ -116,14 +99,15 @@ module LogStash module Filters module Dictionary
116
99
  end
117
100
  end
118
101
 
102
+ # scheduler executes this method, periodically
119
103
  def reload_dictionary
120
- return if @stopping.true?
121
104
  if @short_refresh
122
105
  load_dictionary if needs_refresh?
123
106
  else
124
107
  load_dictionary
125
108
  end
126
109
  end
110
+ public :reload_dictionary
127
111
 
128
112
  def needs_refresh?
129
113
  @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
@@ -136,7 +120,7 @@ module LogStash module Filters module Dictionary
136
120
  dfe.set_backtrace(e.backtrace)
137
121
  raise dfe
138
122
  else
139
- @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
123
+ logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
140
124
  end
141
125
  end
142
126
  end
@@ -6,9 +6,6 @@ module LogStash module Filters module Dictionary
6
6
 
7
7
  protected
8
8
 
9
- def initialize_for_file_type
10
- end
11
-
12
9
  def read_file_into_dictionary
13
10
  content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
11
  @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
@@ -7,18 +7,20 @@ module LogStash module Filters module Dictionary
7
7
 
8
8
  protected
9
9
 
10
- def initialize_for_file_type
10
+ def initialize_for_file_type(**file_type_args)
11
11
  @visitor = YamlVisitor.create
12
+
13
+ @parser = Psych::Parser.new(Psych::TreeBuilder.new)
14
+ @parser.code_point_limit = file_type_args[:yaml_code_point_limit]
12
15
  end
13
16
 
14
17
  def read_file_into_dictionary
15
18
  # low level YAML read that tries to create as
16
19
  # few intermediate objects as possible
17
20
  # this overwrites the value at key
18
- @visitor.accept_with_dictionary(
19
- @dictionary, Psych.parse_stream(
20
- IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
- ))
21
+ yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
22
+ @parser.parse(yaml_string, @dictionary_path)
23
+ @visitor.accept_with_dictionary(@dictionary, @parser.handler.root)
22
24
  end
23
25
  end
24
26
  end end end
@@ -4,6 +4,7 @@ require "logstash/namespace"
4
4
  require 'logstash/plugin_mixins/ecs_compatibility_support'
5
5
  require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter'
6
6
  require 'logstash/plugin_mixins/deprecation_logger_support'
7
+ require 'logstash/plugin_mixins/scheduler'
7
8
 
8
9
  require "logstash/filters/dictionary/memory"
9
10
  require "logstash/filters/dictionary/file"
@@ -44,6 +45,8 @@ class Translate < LogStash::Filters::Base
44
45
 
45
46
  extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter
46
47
 
48
+ include LogStash::PluginMixins::Scheduler
49
+
47
50
  config_name "translate"
48
51
 
49
52
  # The name of the logstash event field containing the value to be compared for a
@@ -99,6 +102,12 @@ class Translate < LogStash::Filters::Base
99
102
  # as the original text, and the second column as the replacement.
100
103
  config :dictionary_path, :validate => :path
101
104
 
105
+ # The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
106
+ # Snakeyaml 1.33 has a default limit 3MB. YAML file over the limit throws exception. JSON and CSV currently do not have such limit.
107
+ # The limit could be too small in some use cases. Setting a bigger number in `yaml_dictionary_code_point_limit` to relax the restriction.
108
+ # The default value is 128MB for code points of size 1 byte
109
+ config :yaml_dictionary_code_point_limit, :validate => :number
110
+
102
111
  # When using a dictionary file, this setting will indicate how frequently
103
112
  # (in seconds) logstash will check the dictionary file for updates.
104
113
  config :refresh_interval, :validate => :number, :default => 300
@@ -177,8 +186,22 @@ class Translate < LogStash::Filters::Base
177
186
  )
178
187
  end
179
188
 
189
+ # check and set yaml code point limit
190
+ # set lookup dictionary
180
191
  if @dictionary_path
181
- @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
192
+ if yaml_file?(@dictionary_path)
193
+ @yaml_dictionary_code_point_limit ||= 134_217_728
194
+
195
+ if @yaml_dictionary_code_point_limit <= 0
196
+ raise LogStash::ConfigurationError, "Please set a positive number in `yaml_dictionary_code_point_limit => #{@yaml_dictionary_code_point_limit}`."
197
+ else
198
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit)
199
+ end
200
+ elsif @yaml_dictionary_code_point_limit != nil
201
+ raise LogStash::ConfigurationError, "Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format"
202
+ else
203
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
204
+ end
182
205
  else
183
206
  @lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
184
207
  end
@@ -228,11 +251,11 @@ class Translate < LogStash::Filters::Base
228
251
  else
229
252
  @logger.debug? && @logger.debug("#{self.class.name}: Dictionary translation method - Fuzzy")
230
253
  end
231
- end # def register
232
254
 
233
- def close
234
- @lookup.stop_scheduler
235
- end
255
+ if @lookup.respond_to?(:reload_dictionary) && @refresh_interval > 0 # a scheduler interval of zero makes no sense
256
+ scheduler.interval("#{@refresh_interval}s", overlap: false) { @lookup.reload_dictionary }
257
+ end
258
+ end # def register
236
259
 
237
260
  def filter(event)
238
261
  return unless @updater.test_for_inclusion(event, @override)
@@ -242,5 +265,9 @@ class Translate < LogStash::Filters::Base
242
265
  @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
243
266
  end
244
267
  end # def filter
268
+
269
+ def yaml_file?(path)
270
+ /\.y[a]?ml$/.match(path)
271
+ end
245
272
  end # class LogStash::Filters::Translate
246
273
  end end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-translate'
4
- s.version = '3.3.1'
4
+ s.version = '3.4.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Replaces field contents based on a hash or YAML file"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -24,7 +24,8 @@ Gem::Specification.new do |s|
24
24
  s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.2'
25
25
  s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0'
26
26
  s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~> 1.0'
27
- s.add_runtime_dependency 'rufus-scheduler'
27
+ s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0'
28
+ s.add_runtime_dependency "psych", ">= 5.1.0"
28
29
 
29
30
  s.add_development_dependency 'logstash-devutils'
30
31
  s.add_development_dependency 'rspec-sequencing'
@@ -57,9 +57,9 @@ describe LogStash::Filters::Translate do
57
57
  end
58
58
  end
59
59
  .then_after(1.2, "wait then translate again") do
60
- subject.filter(event)
61
60
  try(5) do
62
- wait(0.1).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
61
+ subject.filter(event)
62
+ wait(0.5).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
63
63
  end
64
64
  end
65
65
  .then("stop") do
@@ -88,9 +88,9 @@ describe LogStash::Filters::Translate do
88
88
  end
89
89
  end
90
90
  .then_after(1.2, "wait then translate again") do
91
- subject.filter(event)
92
91
  try(5) do
93
- wait(0.1).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
92
+ subject.filter(event)
93
+ wait(0.5).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
94
94
  end
95
95
  end
96
96
  .then("stop") do
@@ -240,6 +240,85 @@ describe LogStash::Filters::Translate do
240
240
  end
241
241
  end
242
242
 
243
+ describe "when using a yml dictionary with code point limit" do
244
+ let(:config) do
245
+ {
246
+ "source" => "status",
247
+ "target" => "translation",
248
+ "dictionary_path" => dictionary_path,
249
+ "yaml_dictionary_code_point_limit" => dictionary_size # the file is 18 bytes
250
+ }
251
+ end
252
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
253
+ let(:event) { LogStash::Event.new("status" => "a") }
254
+
255
+ context "dictionary is over limit" do
256
+ let(:dictionary_size) { 17 }
257
+
258
+ it "raises exception" do
259
+ expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
260
+ end
261
+ end
262
+
263
+ context "dictionary is within limit" do
264
+ let(:dictionary_size) { 18 }
265
+
266
+ it "returns the exact translation" do
267
+ subject.register
268
+ subject.filter(event)
269
+ expect(event.get("translation")).to eq(1)
270
+ end
271
+ end
272
+
273
+ context "limit set to zero" do
274
+ let(:dictionary_size) { 0 }
275
+
276
+ it "raises configuration exception" do
277
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please set a positive number/)
278
+ end
279
+ end
280
+
281
+ context "limit is unset" do
282
+ let(:config) do
283
+ {
284
+ "source" => "status",
285
+ "target" => "translation",
286
+ "dictionary_path" => dictionary_path,
287
+ }
288
+ end
289
+
290
+ it "sets the limit to 128MB" do
291
+ subject.register
292
+ expect(subject.instance_variable_get(:@yaml_dictionary_code_point_limit)).to eq(134_217_728)
293
+ end
294
+ end
295
+
296
+ context "dictionary is json and limit is set" do
297
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.json") }
298
+ let(:dictionary_size) { 100 }
299
+
300
+ it "raises configuration exception" do
301
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format/)
302
+ end
303
+ end
304
+
305
+ context "dictionary is json and limit is unset" do
306
+ let(:config) do
307
+ {
308
+ "source" => "status",
309
+ "target" => "translation",
310
+ "dictionary_path" => TranslateUtil.build_fixture_path("dict.json"),
311
+ }
312
+ end
313
+
314
+ it "returns the exact translation" do
315
+ subject.register
316
+ subject.filter(event)
317
+ expect(event.get("translation")).to eq(10)
318
+ end
319
+ end
320
+ end
321
+
243
322
  context "when using a map tagged yml file" do
244
323
  let(:dictionary_path) { TranslateUtil.build_fixture_path("tag-map-dict.yml") }
245
324
  let(:event) { LogStash::Event.new("status" => "six") }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-translate
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-08 00:00:00.000000000 Z
11
+ date: 2023-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -72,20 +72,34 @@ dependencies:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '1.0'
75
+ - !ruby/object:Gem::Dependency
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '1.0'
81
+ name: logstash-mixin-scheduler
82
+ prerelease: false
83
+ type: :runtime
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.0'
75
89
  - !ruby/object:Gem::Dependency
76
90
  requirement: !ruby/object:Gem::Requirement
77
91
  requirements:
78
92
  - - ">="
79
93
  - !ruby/object:Gem::Version
80
- version: '0'
81
- name: rufus-scheduler
94
+ version: 5.1.0
95
+ name: psych
82
96
  prerelease: false
83
97
  type: :runtime
84
98
  version_requirements: !ruby/object:Gem::Requirement
85
99
  requirements:
86
100
  - - ">="
87
101
  - !ruby/object:Gem::Version
88
- version: '0'
102
+ version: 5.1.0
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
@@ -206,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
206
220
  - !ruby/object:Gem::Version
207
221
  version: '0'
208
222
  requirements: []
209
- rubygems_version: 3.1.6
223
+ rubygems_version: 3.2.33
210
224
  signing_key:
211
225
  specification_version: 4
212
226
  summary: Replaces field contents based on a hash or YAML file