logstash-filter-translate 3.4.0 → 3.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f628dd5414571a8ad48ac804294f9404b4eaa460457e15955824d8a2ff5b6809
4
- data.tar.gz: 6e762d49f1030f379fb5033c75c177f4eca134672bb446a27bb7c69821858f03
3
+ metadata.gz: 2159e15fd76b237d148128893d1ee1de04787628a72f31a889021220d24ecd68
4
+ data.tar.gz: '05348931cfb26f1f6968ebe703c43c9d4b0f744deba529c66bc4b8887805b79d'
5
5
  SHA512:
6
- metadata.gz: d8a2a50b3668fa320d10587fd903d0a47a84b5cb0416876aea7ebdfad73a0692379c4e34b60aaa3b49a6297a7deb418ce87bbcc40947a85c673af549b00bc358
7
- data.tar.gz: bc28c2a3e20d1b8f2b5d3c713978e32e5fee1ed2ea95d74af446f5a686096ee6ae7e179cf828aba5c2857f187a1449b61ab22e0d19e16fbaf5e1c8e540971471
6
+ metadata.gz: e0c5b356db73ea5815fad77e8335c4788857cf3bd9f79688152d0152fbf4e9cfd76297f0d0289a407f502a7475adf389398e2cbc9c69166337dc90dab0db1fc8
7
+ data.tar.gz: c66b52755ff2913c3e840ae08476636c07cd5cb90cebaaf9d8d128f4f0e132cb0b4b332730c2522ff79b1c9f76f2181f12063c54c8bac1adb7afa16c850ae82f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 3.4.1
2
+ - Fix the limitation of the size of yaml file that exceeds 3MB [#97](https://github.com/logstash-plugins/logstash-filter-translate/pull/97)
3
+
1
4
  ## 3.4.0
2
5
  - Refactor: leverage scheduler mixin [#93](https://github.com/logstash-plugins/logstash-filter-translate/pull/93)
3
6
 
data/docs/index.asciidoc CHANGED
@@ -108,6 +108,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
108
108
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
109
109
  | <<plugins-{type}s-{plugin}-refresh_behaviour>> |<<string,string>>|No
110
110
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
111
+ | <<plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit>> |<<number,number>>|No
111
112
  |=======================================================================
112
113
 
113
114
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -421,5 +422,15 @@ The target field you wish to populate with the translated code.
421
422
  If you set this value to the same value as `source` field, the plugin does a substitution, and
422
423
  the filter will succeed. This will clobber the old value of the source field!
423
424
 
425
+
426
+ [id="plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit"]
427
+ ===== `yaml_dictionary_code_point_limit`
428
+
429
+ * Value type is <<number,number>>
430
+ * Default value is 134217728 (128MB for 1 byte code points)
431
+
432
+ The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
433
+ This setting is effective for YAML file only. YAML over the limit throws exception.
434
+
424
435
  [id="plugins-{type}s-{plugin}-common-options"]
425
436
  include::{include_path}/{type}.asciidoc[]
@@ -9,9 +9,9 @@ module LogStash module Filters module Dictionary
9
9
 
10
10
  include LogStash::Util::Loggable
11
11
 
12
- def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
12
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex, **file_type_args)
13
13
  if /\.y[a]?ml$/.match(path)
14
- instance = YamlFile.new(path, refresh_interval, exact, regex)
14
+ instance = YamlFile.new(path, refresh_interval, exact, regex, file_type_args)
15
15
  elsif path.end_with?(".json")
16
16
  instance = JsonFile.new(path, refresh_interval, exact, regex)
17
17
  elsif path.end_with?(".csv")
@@ -31,7 +31,7 @@ module LogStash module Filters module Dictionary
31
31
 
32
32
  attr_reader :dictionary, :fetch_strategy
33
33
 
34
- def initialize(path, refresh_interval, exact, regex)
34
+ def initialize(path, refresh_interval, exact, regex, **file_type_args)
35
35
  @dictionary_path = path
36
36
  @refresh_interval = refresh_interval
37
37
  @short_refresh = @refresh_interval <= 300
@@ -39,7 +39,7 @@ module LogStash module Filters module Dictionary
39
39
  @write_lock = rw_lock.writeLock
40
40
  @dictionary = Hash.new
41
41
  @update_method = method(:merge_dictionary)
42
- initialize_for_file_type
42
+ initialize_for_file_type(file_type_args)
43
43
  args = [@dictionary, rw_lock]
44
44
  klass = case
45
45
  when exact && regex then FetchStrategy::File::ExactRegex
@@ -55,7 +55,7 @@ module LogStash module Filters module Dictionary
55
55
  @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
56
56
  @update_method.call
57
57
  rescue Errno::ENOENT
58
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
58
+ logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
59
59
  rescue => e
60
60
  loading_exception(e, raise_exception)
61
61
  end
@@ -68,7 +68,7 @@ module LogStash module Filters module Dictionary
68
68
 
69
69
  protected
70
70
 
71
- def initialize_for_file_type
71
+ def initialize_for_file_type(**file_type_args)
72
72
  # sub class specific initializer
73
73
  end
74
74
 
@@ -120,7 +120,7 @@ module LogStash module Filters module Dictionary
120
120
  dfe.set_backtrace(e.backtrace)
121
121
  raise dfe
122
122
  else
123
- @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
123
+ logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
124
124
  end
125
125
  end
126
126
  end
@@ -6,9 +6,6 @@ module LogStash module Filters module Dictionary
6
6
 
7
7
  protected
8
8
 
9
- def initialize_for_file_type
10
- end
11
-
12
9
  def read_file_into_dictionary
13
10
  content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
11
  @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
@@ -7,18 +7,20 @@ module LogStash module Filters module Dictionary
7
7
 
8
8
  protected
9
9
 
10
- def initialize_for_file_type
10
+ def initialize_for_file_type(**file_type_args)
11
11
  @visitor = YamlVisitor.create
12
+
13
+ @parser = Psych::Parser.new(Psych::TreeBuilder.new)
14
+ @parser.code_point_limit = file_type_args[:yaml_code_point_limit]
12
15
  end
13
16
 
14
17
  def read_file_into_dictionary
15
18
  # low level YAML read that tries to create as
16
19
  # few intermediate objects as possible
17
20
  # this overwrites the value at key
18
- @visitor.accept_with_dictionary(
19
- @dictionary, Psych.parse_stream(
20
- IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
- ))
21
+ yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
22
+ @parser.parse(yaml_string, @dictionary_path)
23
+ @visitor.accept_with_dictionary(@dictionary, @parser.handler.root)
22
24
  end
23
25
  end
24
26
  end end end
@@ -102,6 +102,12 @@ class Translate < LogStash::Filters::Base
102
102
  # as the original text, and the second column as the replacement.
103
103
  config :dictionary_path, :validate => :path
104
104
 
105
+ # The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
106
+ # Snakeyaml 1.33 has a default limit 3MB. YAML file over the limit throws exception. JSON and CSV currently do not have such limit.
107
+ # The limit could be too small in some use cases. Setting a bigger number in `yaml_dictionary_code_point_limit` to relax the restriction.
108
+ # The default value is 128MB for code points of size 1 byte
109
+ config :yaml_dictionary_code_point_limit, :validate => :number
110
+
105
111
  # When using a dictionary file, this setting will indicate how frequently
106
112
  # (in seconds) logstash will check the dictionary file for updates.
107
113
  config :refresh_interval, :validate => :number, :default => 300
@@ -180,8 +186,22 @@ class Translate < LogStash::Filters::Base
180
186
  )
181
187
  end
182
188
 
189
+ # check and set yaml code point limit
190
+ # set lookup dictionary
183
191
  if @dictionary_path
184
- @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
192
+ if yaml_file?(@dictionary_path)
193
+ @yaml_dictionary_code_point_limit ||= 134_217_728
194
+
195
+ if @yaml_dictionary_code_point_limit <= 0
196
+ raise LogStash::ConfigurationError, "Please set a positive number in `yaml_dictionary_code_point_limit => #{@yaml_dictionary_code_point_limit}`."
197
+ else
198
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit)
199
+ end
200
+ elsif @yaml_dictionary_code_point_limit != nil
201
+ raise LogStash::ConfigurationError, "Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format"
202
+ else
203
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
204
+ end
185
205
  else
186
206
  @lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
187
207
  end
@@ -245,5 +265,9 @@ class Translate < LogStash::Filters::Base
245
265
  @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
246
266
  end
247
267
  end # def filter
268
+
269
+ def yaml_file?(path)
270
+ /\.y[a]?ml$/.match(path)
271
+ end
248
272
  end # class LogStash::Filters::Translate
249
273
  end end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-translate'
4
- s.version = '3.4.0'
4
+ s.version = '3.4.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Replaces field contents based on a hash or YAML file"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
25
25
  s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0'
26
26
  s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~> 1.0'
27
27
  s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0'
28
+ s.add_runtime_dependency "psych", ">= 5.1.0"
28
29
 
29
30
  s.add_development_dependency 'logstash-devutils'
30
31
  s.add_development_dependency 'rspec-sequencing'
@@ -57,9 +57,9 @@ describe LogStash::Filters::Translate do
57
57
  end
58
58
  end
59
59
  .then_after(1.2, "wait then translate again") do
60
- subject.filter(event)
61
60
  try(5) do
62
- wait(0.1).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
61
+ subject.filter(event)
62
+ wait(0.5).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
63
63
  end
64
64
  end
65
65
  .then("stop") do
@@ -88,9 +88,9 @@ describe LogStash::Filters::Translate do
88
88
  end
89
89
  end
90
90
  .then_after(1.2, "wait then translate again") do
91
- subject.filter(event)
92
91
  try(5) do
93
- wait(0.1).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
92
+ subject.filter(event)
93
+ wait(0.5).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
94
94
  end
95
95
  end
96
96
  .then("stop") do
@@ -240,6 +240,85 @@ describe LogStash::Filters::Translate do
240
240
  end
241
241
  end
242
242
 
243
+ describe "when using a yml dictionary with code point limit" do
244
+ let(:config) do
245
+ {
246
+ "source" => "status",
247
+ "target" => "translation",
248
+ "dictionary_path" => dictionary_path,
249
+ "yaml_dictionary_code_point_limit" => dictionary_size # the file is 18 bytes
250
+ }
251
+ end
252
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
253
+ let(:event) { LogStash::Event.new("status" => "a") }
254
+
255
+ context "dictionary is over limit" do
256
+ let(:dictionary_size) { 17 }
257
+
258
+ it "raises exception" do
259
+ expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
260
+ end
261
+ end
262
+
263
+ context "dictionary is within limit" do
264
+ let(:dictionary_size) { 18 }
265
+
266
+ it "returns the exact translation" do
267
+ subject.register
268
+ subject.filter(event)
269
+ expect(event.get("translation")).to eq(1)
270
+ end
271
+ end
272
+
273
+ context "limit set to zero" do
274
+ let(:dictionary_size) { 0 }
275
+
276
+ it "raises configuration exception" do
277
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please set a positive number/)
278
+ end
279
+ end
280
+
281
+ context "limit is unset" do
282
+ let(:config) do
283
+ {
284
+ "source" => "status",
285
+ "target" => "translation",
286
+ "dictionary_path" => dictionary_path,
287
+ }
288
+ end
289
+
290
+ it "sets the limit to 128MB" do
291
+ subject.register
292
+ expect(subject.instance_variable_get(:@yaml_dictionary_code_point_limit)).to eq(134_217_728)
293
+ end
294
+ end
295
+
296
+ context "dictionary is json and limit is set" do
297
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.json") }
298
+ let(:dictionary_size) { 100 }
299
+
300
+ it "raises configuration exception" do
301
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format/)
302
+ end
303
+ end
304
+
305
+ context "dictionary is json and limit is unset" do
306
+ let(:config) do
307
+ {
308
+ "source" => "status",
309
+ "target" => "translation",
310
+ "dictionary_path" => TranslateUtil.build_fixture_path("dict.json"),
311
+ }
312
+ end
313
+
314
+ it "returns the exact translation" do
315
+ subject.register
316
+ subject.filter(event)
317
+ expect(event.get("translation")).to eq(10)
318
+ end
319
+ end
320
+ end
321
+
243
322
  context "when using a map tagged yml file" do
244
323
  let(:dictionary_path) { TranslateUtil.build_fixture_path("tag-map-dict.yml") }
245
324
  let(:event) { LogStash::Event.new("status" => "six") }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-translate
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.0
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-08 00:00:00.000000000 Z
11
+ date: 2023-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -86,6 +86,20 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '1.0'
89
+ - !ruby/object:Gem::Dependency
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 5.1.0
95
+ name: psych
96
+ prerelease: false
97
+ type: :runtime
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 5.1.0
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
@@ -206,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
206
220
  - !ruby/object:Gem::Version
207
221
  version: '0'
208
222
  requirements: []
209
- rubygems_version: 3.1.6
223
+ rubygems_version: 3.2.33
210
224
  signing_key:
211
225
  specification_version: 4
212
226
  summary: Replaces field contents based on a hash or YAML file