logstash-filter-translate 3.4.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f628dd5414571a8ad48ac804294f9404b4eaa460457e15955824d8a2ff5b6809
4
- data.tar.gz: 6e762d49f1030f379fb5033c75c177f4eca134672bb446a27bb7c69821858f03
3
+ metadata.gz: 2159e15fd76b237d148128893d1ee1de04787628a72f31a889021220d24ecd68
4
+ data.tar.gz: '05348931cfb26f1f6968ebe703c43c9d4b0f744deba529c66bc4b8887805b79d'
5
5
  SHA512:
6
- metadata.gz: d8a2a50b3668fa320d10587fd903d0a47a84b5cb0416876aea7ebdfad73a0692379c4e34b60aaa3b49a6297a7deb418ce87bbcc40947a85c673af549b00bc358
7
- data.tar.gz: bc28c2a3e20d1b8f2b5d3c713978e32e5fee1ed2ea95d74af446f5a686096ee6ae7e179cf828aba5c2857f187a1449b61ab22e0d19e16fbaf5e1c8e540971471
6
+ metadata.gz: e0c5b356db73ea5815fad77e8335c4788857cf3bd9f79688152d0152fbf4e9cfd76297f0d0289a407f502a7475adf389398e2cbc9c69166337dc90dab0db1fc8
7
+ data.tar.gz: c66b52755ff2913c3e840ae08476636c07cd5cb90cebaaf9d8d128f4f0e132cb0b4b332730c2522ff79b1c9f76f2181f12063c54c8bac1adb7afa16c850ae82f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 3.4.1
2
+ - Fix the limitation of the size of yaml file that exceeds 3MB [#97](https://github.com/logstash-plugins/logstash-filter-translate/pull/97)
3
+
1
4
  ## 3.4.0
2
5
  - Refactor: leverage scheduler mixin [#93](https://github.com/logstash-plugins/logstash-filter-translate/pull/93)
3
6
 
data/docs/index.asciidoc CHANGED
@@ -108,6 +108,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
108
108
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
109
109
  | <<plugins-{type}s-{plugin}-refresh_behaviour>> |<<string,string>>|No
110
110
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
111
+ | <<plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit>> |<<number,number>>|No
111
112
  |=======================================================================
112
113
 
113
114
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -421,5 +422,15 @@ The target field you wish to populate with the translated code.
421
422
  If you set this value to the same value as `source` field, the plugin does a substitution, and
422
423
  the filter will succeed. This will clobber the old value of the source field!
423
424
 
425
+
426
+ [id="plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit"]
427
+ ===== `yaml_dictionary_code_point_limit`
428
+
429
+ * Value type is <<number,number>>
430
+ * Default value is 134217728 (128MB for 1 byte code points)
431
+
432
+ The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
433
+ This setting is effective for YAML file only. YAML over the limit throws exception.
434
+
424
435
  [id="plugins-{type}s-{plugin}-common-options"]
425
436
  include::{include_path}/{type}.asciidoc[]
@@ -9,9 +9,9 @@ module LogStash module Filters module Dictionary
9
9
 
10
10
  include LogStash::Util::Loggable
11
11
 
12
- def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
12
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex, **file_type_args)
13
13
  if /\.y[a]?ml$/.match(path)
14
- instance = YamlFile.new(path, refresh_interval, exact, regex)
14
+ instance = YamlFile.new(path, refresh_interval, exact, regex, file_type_args)
15
15
  elsif path.end_with?(".json")
16
16
  instance = JsonFile.new(path, refresh_interval, exact, regex)
17
17
  elsif path.end_with?(".csv")
@@ -31,7 +31,7 @@ module LogStash module Filters module Dictionary
31
31
 
32
32
  attr_reader :dictionary, :fetch_strategy
33
33
 
34
- def initialize(path, refresh_interval, exact, regex)
34
+ def initialize(path, refresh_interval, exact, regex, **file_type_args)
35
35
  @dictionary_path = path
36
36
  @refresh_interval = refresh_interval
37
37
  @short_refresh = @refresh_interval <= 300
@@ -39,7 +39,7 @@ module LogStash module Filters module Dictionary
39
39
  @write_lock = rw_lock.writeLock
40
40
  @dictionary = Hash.new
41
41
  @update_method = method(:merge_dictionary)
42
- initialize_for_file_type
42
+ initialize_for_file_type(file_type_args)
43
43
  args = [@dictionary, rw_lock]
44
44
  klass = case
45
45
  when exact && regex then FetchStrategy::File::ExactRegex
@@ -55,7 +55,7 @@ module LogStash module Filters module Dictionary
55
55
  @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
56
56
  @update_method.call
57
57
  rescue Errno::ENOENT
58
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
58
+ logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
59
59
  rescue => e
60
60
  loading_exception(e, raise_exception)
61
61
  end
@@ -68,7 +68,7 @@ module LogStash module Filters module Dictionary
68
68
 
69
69
  protected
70
70
 
71
- def initialize_for_file_type
71
+ def initialize_for_file_type(**file_type_args)
72
72
  # sub class specific initializer
73
73
  end
74
74
 
@@ -120,7 +120,7 @@ module LogStash module Filters module Dictionary
120
120
  dfe.set_backtrace(e.backtrace)
121
121
  raise dfe
122
122
  else
123
- @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
123
+ logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
124
124
  end
125
125
  end
126
126
  end
@@ -6,9 +6,6 @@ module LogStash module Filters module Dictionary
6
6
 
7
7
  protected
8
8
 
9
- def initialize_for_file_type
10
- end
11
-
12
9
  def read_file_into_dictionary
13
10
  content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
11
  @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
@@ -7,18 +7,20 @@ module LogStash module Filters module Dictionary
7
7
 
8
8
  protected
9
9
 
10
- def initialize_for_file_type
10
+ def initialize_for_file_type(**file_type_args)
11
11
  @visitor = YamlVisitor.create
12
+
13
+ @parser = Psych::Parser.new(Psych::TreeBuilder.new)
14
+ @parser.code_point_limit = file_type_args[:yaml_code_point_limit]
12
15
  end
13
16
 
14
17
  def read_file_into_dictionary
15
18
  # low level YAML read that tries to create as
16
19
  # few intermediate objects as possible
17
20
  # this overwrites the value at key
18
- @visitor.accept_with_dictionary(
19
- @dictionary, Psych.parse_stream(
20
- IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
- ))
21
+ yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
22
+ @parser.parse(yaml_string, @dictionary_path)
23
+ @visitor.accept_with_dictionary(@dictionary, @parser.handler.root)
22
24
  end
23
25
  end
24
26
  end end end
@@ -102,6 +102,12 @@ class Translate < LogStash::Filters::Base
102
102
  # as the original text, and the second column as the replacement.
103
103
  config :dictionary_path, :validate => :path
104
104
 
105
+ # The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
106
+ # Snakeyaml 1.33 has a default limit 3MB. YAML file over the limit throws exception. JSON and CSV currently do not have such limit.
107
+ # The limit could be too small in some use cases. Setting a bigger number in `yaml_dictionary_code_point_limit` to relax the restriction.
108
+ # The default value is 128MB for code points of size 1 byte
109
+ config :yaml_dictionary_code_point_limit, :validate => :number
110
+
105
111
  # When using a dictionary file, this setting will indicate how frequently
106
112
  # (in seconds) logstash will check the dictionary file for updates.
107
113
  config :refresh_interval, :validate => :number, :default => 300
@@ -180,8 +186,22 @@ class Translate < LogStash::Filters::Base
180
186
  )
181
187
  end
182
188
 
189
+ # check and set yaml code point limit
190
+ # set lookup dictionary
183
191
  if @dictionary_path
184
- @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
192
+ if yaml_file?(@dictionary_path)
193
+ @yaml_dictionary_code_point_limit ||= 134_217_728
194
+
195
+ if @yaml_dictionary_code_point_limit <= 0
196
+ raise LogStash::ConfigurationError, "Please set a positive number in `yaml_dictionary_code_point_limit => #{@yaml_dictionary_code_point_limit}`."
197
+ else
198
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit)
199
+ end
200
+ elsif @yaml_dictionary_code_point_limit != nil
201
+ raise LogStash::ConfigurationError, "Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format"
202
+ else
203
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
204
+ end
185
205
  else
186
206
  @lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
187
207
  end
@@ -245,5 +265,9 @@ class Translate < LogStash::Filters::Base
245
265
  @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
246
266
  end
247
267
  end # def filter
268
+
269
+ def yaml_file?(path)
270
+ /\.y[a]?ml$/.match(path)
271
+ end
248
272
  end # class LogStash::Filters::Translate
249
273
  end end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-translate'
4
- s.version = '3.4.0'
4
+ s.version = '3.4.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Replaces field contents based on a hash or YAML file"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
25
25
  s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0'
26
26
  s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~> 1.0'
27
27
  s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0'
28
+ s.add_runtime_dependency "psych", ">= 5.1.0"
28
29
 
29
30
  s.add_development_dependency 'logstash-devutils'
30
31
  s.add_development_dependency 'rspec-sequencing'
@@ -57,9 +57,9 @@ describe LogStash::Filters::Translate do
57
57
  end
58
58
  end
59
59
  .then_after(1.2, "wait then translate again") do
60
- subject.filter(event)
61
60
  try(5) do
62
- wait(0.1).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
61
+ subject.filter(event)
62
+ wait(0.5).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
63
63
  end
64
64
  end
65
65
  .then("stop") do
@@ -88,9 +88,9 @@ describe LogStash::Filters::Translate do
88
88
  end
89
89
  end
90
90
  .then_after(1.2, "wait then translate again") do
91
- subject.filter(event)
92
91
  try(5) do
93
- wait(0.1).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
92
+ subject.filter(event)
93
+ wait(0.5).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
94
94
  end
95
95
  end
96
96
  .then("stop") do
@@ -240,6 +240,85 @@ describe LogStash::Filters::Translate do
240
240
  end
241
241
  end
242
242
 
243
+ describe "when using a yml dictionary with code point limit" do
244
+ let(:config) do
245
+ {
246
+ "source" => "status",
247
+ "target" => "translation",
248
+ "dictionary_path" => dictionary_path,
249
+ "yaml_dictionary_code_point_limit" => dictionary_size # the file is 18 bytes
250
+ }
251
+ end
252
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
253
+ let(:event) { LogStash::Event.new("status" => "a") }
254
+
255
+ context "dictionary is over limit" do
256
+ let(:dictionary_size) { 17 }
257
+
258
+ it "raises exception" do
259
+ expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
260
+ end
261
+ end
262
+
263
+ context "dictionary is within limit" do
264
+ let(:dictionary_size) { 18 }
265
+
266
+ it "returns the exact translation" do
267
+ subject.register
268
+ subject.filter(event)
269
+ expect(event.get("translation")).to eq(1)
270
+ end
271
+ end
272
+
273
+ context "limit set to zero" do
274
+ let(:dictionary_size) { 0 }
275
+
276
+ it "raises configuration exception" do
277
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please set a positive number/)
278
+ end
279
+ end
280
+
281
+ context "limit is unset" do
282
+ let(:config) do
283
+ {
284
+ "source" => "status",
285
+ "target" => "translation",
286
+ "dictionary_path" => dictionary_path,
287
+ }
288
+ end
289
+
290
+ it "sets the limit to 128MB" do
291
+ subject.register
292
+ expect(subject.instance_variable_get(:@yaml_dictionary_code_point_limit)).to eq(134_217_728)
293
+ end
294
+ end
295
+
296
+ context "dictionary is json and limit is set" do
297
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.json") }
298
+ let(:dictionary_size) { 100 }
299
+
300
+ it "raises configuration exception" do
301
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format/)
302
+ end
303
+ end
304
+
305
+ context "dictionary is json and limit is unset" do
306
+ let(:config) do
307
+ {
308
+ "source" => "status",
309
+ "target" => "translation",
310
+ "dictionary_path" => TranslateUtil.build_fixture_path("dict.json"),
311
+ }
312
+ end
313
+
314
+ it "returns the exact translation" do
315
+ subject.register
316
+ subject.filter(event)
317
+ expect(event.get("translation")).to eq(10)
318
+ end
319
+ end
320
+ end
321
+
243
322
  context "when using a map tagged yml file" do
244
323
  let(:dictionary_path) { TranslateUtil.build_fixture_path("tag-map-dict.yml") }
245
324
  let(:event) { LogStash::Event.new("status" => "six") }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-translate
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.0
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-08 00:00:00.000000000 Z
11
+ date: 2023-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -86,6 +86,20 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '1.0'
89
+ - !ruby/object:Gem::Dependency
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 5.1.0
95
+ name: psych
96
+ prerelease: false
97
+ type: :runtime
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 5.1.0
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
@@ -206,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
206
220
  - !ruby/object:Gem::Version
207
221
  version: '0'
208
222
  requirements: []
209
- rubygems_version: 3.1.6
223
+ rubygems_version: 3.2.33
210
224
  signing_key:
211
225
  specification_version: 4
212
226
  summary: Replaces field contents based on a hash or YAML file