logstash-filter-translate 3.3.1 → 3.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 77bd945dee5e20dbc7eb6837c655e807373ee8923dcbd498c9fcf5b1065c6661
4
- data.tar.gz: 878df6d8dfcc5fa53bb617163b0ddfe6feeb8e876aed9668b337f66dc8472221
3
+ metadata.gz: 2159e15fd76b237d148128893d1ee1de04787628a72f31a889021220d24ecd68
4
+ data.tar.gz: '05348931cfb26f1f6968ebe703c43c9d4b0f744deba529c66bc4b8887805b79d'
5
5
  SHA512:
6
- metadata.gz: d45ec30865d73884f9564f3680fdc1fb230c9fa71507c86d475e6ec0759d13863080f8c237ab34b38ccad08bca0781ab1d8a8523955841db2d0a0334091a9cf3
7
- data.tar.gz: 443284f8db270ec6081a2017bd6597733be886643232d97a7630c99e1b194970183e0a7d070ed511bdb1d8069e7f929a991d608798a35d8bd5408878806945d5
6
+ metadata.gz: e0c5b356db73ea5815fad77e8335c4788857cf3bd9f79688152d0152fbf4e9cfd76297f0d0289a407f502a7475adf389398e2cbc9c69166337dc90dab0db1fc8
7
+ data.tar.gz: c66b52755ff2913c3e840ae08476636c07cd5cb90cebaaf9d8d128f4f0e132cb0b4b332730c2522ff79b1c9f76f2181f12063c54c8bac1adb7afa16c850ae82f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 3.4.1
2
+ - Fix the limitation of the size of yaml file that exceeds 3MB [#97](https://github.com/logstash-plugins/logstash-filter-translate/pull/97)
3
+
4
+ ## 3.4.0
5
+ - Refactor: leverage scheduler mixin [#93](https://github.com/logstash-plugins/logstash-filter-translate/pull/93)
6
+
1
7
  ## 3.3.1
2
8
  - Refactor: reading .csv for JRuby 9.3 compatibility [#94](https://github.com/logstash-plugins/logstash-filter-translate/pull/94)
3
9
 
data/docs/index.asciidoc CHANGED
@@ -108,6 +108,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
108
108
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
109
109
  | <<plugins-{type}s-{plugin}-refresh_behaviour>> |<<string,string>>|No
110
110
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
111
+ | <<plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit>> |<<number,number>>|No
111
112
  |=======================================================================
112
113
 
113
114
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -421,5 +422,15 @@ The target field you wish to populate with the translated code.
421
422
  If you set this value to the same value as `source` field, the plugin does a substitution, and
422
423
  the filter will succeed. This will clobber the old value of the source field!
423
424
 
425
+
426
+ [id="plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit"]
427
+ ===== `yaml_dictionary_code_point_limit`
428
+
429
+ * Value type is <<number,number>>
430
+ * Default value is 134217728 (128MB for 1 byte code points)
431
+
432
+ The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
433
+ This setting is effective for YAML file only. YAML over the limit throws exception.
434
+
424
435
  [id="plugins-{type}s-{plugin}-common-options"]
425
436
  include::{include_path}/{type}.asciidoc[]
@@ -1,18 +1,17 @@
1
1
  # encoding: utf-8
2
- require 'concurrent/atomic/atomic_boolean'
3
- require 'rufus-scheduler'
4
2
  require "logstash/util/loggable"
5
3
  require "logstash/filters/fetch_strategy/file"
6
4
 
7
- java_import 'java.util.concurrent.locks.ReentrantReadWriteLock'
8
-
9
5
  module LogStash module Filters module Dictionary
10
6
  class DictionaryFileError < StandardError; end
11
7
 
12
8
  class File
13
- def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
9
+
10
+ include LogStash::Util::Loggable
11
+
12
+ def self.create(path, refresh_interval, refresh_behaviour, exact, regex, **file_type_args)
14
13
  if /\.y[a]?ml$/.match(path)
15
- instance = YamlFile.new(path, refresh_interval, exact, regex)
14
+ instance = YamlFile.new(path, refresh_interval, exact, regex, file_type_args)
16
15
  elsif path.end_with?(".json")
17
16
  instance = JsonFile.new(path, refresh_interval, exact, regex)
18
17
  elsif path.end_with?(".csv")
@@ -30,19 +29,17 @@ module LogStash module Filters module Dictionary
30
29
  end
31
30
  end
32
31
 
33
- include LogStash::Util::Loggable
34
32
  attr_reader :dictionary, :fetch_strategy
35
33
 
36
- def initialize(path, refresh_interval, exact, regex)
34
+ def initialize(path, refresh_interval, exact, regex, **file_type_args)
37
35
  @dictionary_path = path
38
36
  @refresh_interval = refresh_interval
39
37
  @short_refresh = @refresh_interval <= 300
40
- @stopping = Concurrent::AtomicBoolean.new # ported from jdbc_static, need a way to prevent a scheduled execution from running a load.
41
38
  rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
42
39
  @write_lock = rw_lock.writeLock
43
40
  @dictionary = Hash.new
44
41
  @update_method = method(:merge_dictionary)
45
- initialize_for_file_type
42
+ initialize_for_file_type(file_type_args)
46
43
  args = [@dictionary, rw_lock]
47
44
  klass = case
48
45
  when exact && regex then FetchStrategy::File::ExactRegex
@@ -51,13 +48,6 @@ module LogStash module Filters module Dictionary
51
48
  end
52
49
  @fetch_strategy = klass.new(*args)
53
50
  load_dictionary(raise_exception = true)
54
- stop_scheduler(initial = true)
55
- start_scheduler unless @refresh_interval <= 0 # disabled, a scheduler interval of zero makes no sense
56
- end
57
-
58
- def stop_scheduler(initial = false)
59
- @stopping.make_true unless initial
60
- @scheduler.shutdown(:wait) if @scheduler
61
51
  end
62
52
 
63
53
  def load_dictionary(raise_exception=false)
@@ -65,7 +55,7 @@ module LogStash module Filters module Dictionary
65
55
  @dictionary_mtime = ::File.mtime(@dictionary_path).to_f
66
56
  @update_method.call
67
57
  rescue Errno::ENOENT
68
- @logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
58
+ logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
69
59
  rescue => e
70
60
  loading_exception(e, raise_exception)
71
61
  end
@@ -78,7 +68,7 @@ module LogStash module Filters module Dictionary
78
68
 
79
69
  protected
80
70
 
81
- def initialize_for_file_type
71
+ def initialize_for_file_type(**file_type_args)
82
72
  # sub class specific initializer
83
73
  end
84
74
 
@@ -88,13 +78,6 @@ module LogStash module Filters module Dictionary
88
78
 
89
79
  private
90
80
 
91
- def start_scheduler
92
- @scheduler = Rufus::Scheduler.new
93
- @scheduler.interval("#{@refresh_interval}s", :overlap => false) do
94
- reload_dictionary
95
- end
96
- end
97
-
98
81
  def merge_dictionary
99
82
  @write_lock.lock
100
83
  begin
@@ -116,14 +99,15 @@ module LogStash module Filters module Dictionary
116
99
  end
117
100
  end
118
101
 
102
+ # scheduler executes this method, periodically
119
103
  def reload_dictionary
120
- return if @stopping.true?
121
104
  if @short_refresh
122
105
  load_dictionary if needs_refresh?
123
106
  else
124
107
  load_dictionary
125
108
  end
126
109
  end
110
+ public :reload_dictionary
127
111
 
128
112
  def needs_refresh?
129
113
  @dictionary_mtime != ::File.mtime(@dictionary_path).to_f
@@ -136,7 +120,7 @@ module LogStash module Filters module Dictionary
136
120
  dfe.set_backtrace(e.backtrace)
137
121
  raise dfe
138
122
  else
139
- @logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
123
+ logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
140
124
  end
141
125
  end
142
126
  end
@@ -6,9 +6,6 @@ module LogStash module Filters module Dictionary
6
6
 
7
7
  protected
8
8
 
9
- def initialize_for_file_type
10
- end
11
-
12
9
  def read_file_into_dictionary
13
10
  content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
14
11
  @dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
@@ -7,18 +7,20 @@ module LogStash module Filters module Dictionary
7
7
 
8
8
  protected
9
9
 
10
- def initialize_for_file_type
10
+ def initialize_for_file_type(**file_type_args)
11
11
  @visitor = YamlVisitor.create
12
+
13
+ @parser = Psych::Parser.new(Psych::TreeBuilder.new)
14
+ @parser.code_point_limit = file_type_args[:yaml_code_point_limit]
12
15
  end
13
16
 
14
17
  def read_file_into_dictionary
15
18
  # low level YAML read that tries to create as
16
19
  # few intermediate objects as possible
17
20
  # this overwrites the value at key
18
- @visitor.accept_with_dictionary(
19
- @dictionary, Psych.parse_stream(
20
- IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
21
- ))
21
+ yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
22
+ @parser.parse(yaml_string, @dictionary_path)
23
+ @visitor.accept_with_dictionary(@dictionary, @parser.handler.root)
22
24
  end
23
25
  end
24
26
  end end end
@@ -4,6 +4,7 @@ require "logstash/namespace"
4
4
  require 'logstash/plugin_mixins/ecs_compatibility_support'
5
5
  require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter'
6
6
  require 'logstash/plugin_mixins/deprecation_logger_support'
7
+ require 'logstash/plugin_mixins/scheduler'
7
8
 
8
9
  require "logstash/filters/dictionary/memory"
9
10
  require "logstash/filters/dictionary/file"
@@ -44,6 +45,8 @@ class Translate < LogStash::Filters::Base
44
45
 
45
46
  extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter
46
47
 
48
+ include LogStash::PluginMixins::Scheduler
49
+
47
50
  config_name "translate"
48
51
 
49
52
  # The name of the logstash event field containing the value to be compared for a
@@ -99,6 +102,12 @@ class Translate < LogStash::Filters::Base
99
102
  # as the original text, and the second column as the replacement.
100
103
  config :dictionary_path, :validate => :path
101
104
 
105
+ # The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
106
+ # Snakeyaml 1.33 has a default limit 3MB. YAML file over the limit throws exception. JSON and CSV currently do not have such limit.
107
+ # The limit could be too small in some use cases. Setting a bigger number in `yaml_dictionary_code_point_limit` to relax the restriction.
108
+ # The default value is 128MB for code points of size 1 byte
109
+ config :yaml_dictionary_code_point_limit, :validate => :number
110
+
102
111
  # When using a dictionary file, this setting will indicate how frequently
103
112
  # (in seconds) logstash will check the dictionary file for updates.
104
113
  config :refresh_interval, :validate => :number, :default => 300
@@ -177,8 +186,22 @@ class Translate < LogStash::Filters::Base
177
186
  )
178
187
  end
179
188
 
189
+ # check and set yaml code point limit
190
+ # set lookup dictionary
180
191
  if @dictionary_path
181
- @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
192
+ if yaml_file?(@dictionary_path)
193
+ @yaml_dictionary_code_point_limit ||= 134_217_728
194
+
195
+ if @yaml_dictionary_code_point_limit <= 0
196
+ raise LogStash::ConfigurationError, "Please set a positive number in `yaml_dictionary_code_point_limit => #{@yaml_dictionary_code_point_limit}`."
197
+ else
198
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit)
199
+ end
200
+ elsif @yaml_dictionary_code_point_limit != nil
201
+ raise LogStash::ConfigurationError, "Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format"
202
+ else
203
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
204
+ end
182
205
  else
183
206
  @lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
184
207
  end
@@ -228,11 +251,11 @@ class Translate < LogStash::Filters::Base
228
251
  else
229
252
  @logger.debug? && @logger.debug("#{self.class.name}: Dictionary translation method - Fuzzy")
230
253
  end
231
- end # def register
232
254
 
233
- def close
234
- @lookup.stop_scheduler
235
- end
255
+ if @lookup.respond_to?(:reload_dictionary) && @refresh_interval > 0 # a scheduler interval of zero makes no sense
256
+ scheduler.interval("#{@refresh_interval}s", overlap: false) { @lookup.reload_dictionary }
257
+ end
258
+ end # def register
236
259
 
237
260
  def filter(event)
238
261
  return unless @updater.test_for_inclusion(event, @override)
@@ -242,5 +265,9 @@ class Translate < LogStash::Filters::Base
242
265
  @logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
243
266
  end
244
267
  end # def filter
268
+
269
+ def yaml_file?(path)
270
+ /\.y[a]?ml$/.match(path)
271
+ end
245
272
  end # class LogStash::Filters::Translate
246
273
  end end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-translate'
4
- s.version = '3.3.1'
4
+ s.version = '3.4.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Replaces field contents based on a hash or YAML file"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -24,7 +24,8 @@ Gem::Specification.new do |s|
24
24
  s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.2'
25
25
  s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0'
26
26
  s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~> 1.0'
27
- s.add_runtime_dependency 'rufus-scheduler'
27
+ s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0'
28
+ s.add_runtime_dependency "psych", ">= 5.1.0"
28
29
 
29
30
  s.add_development_dependency 'logstash-devutils'
30
31
  s.add_development_dependency 'rspec-sequencing'
@@ -57,9 +57,9 @@ describe LogStash::Filters::Translate do
57
57
  end
58
58
  end
59
59
  .then_after(1.2, "wait then translate again") do
60
- subject.filter(event)
61
60
  try(5) do
62
- wait(0.1).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
61
+ subject.filter(event)
62
+ wait(0.5).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
63
63
  end
64
64
  end
65
65
  .then("stop") do
@@ -88,9 +88,9 @@ describe LogStash::Filters::Translate do
88
88
  end
89
89
  end
90
90
  .then_after(1.2, "wait then translate again") do
91
- subject.filter(event)
92
91
  try(5) do
93
- wait(0.1).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
92
+ subject.filter(event)
93
+ wait(0.5).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
94
94
  end
95
95
  end
96
96
  .then("stop") do
@@ -240,6 +240,85 @@ describe LogStash::Filters::Translate do
240
240
  end
241
241
  end
242
242
 
243
+ describe "when using a yml dictionary with code point limit" do
244
+ let(:config) do
245
+ {
246
+ "source" => "status",
247
+ "target" => "translation",
248
+ "dictionary_path" => dictionary_path,
249
+ "yaml_dictionary_code_point_limit" => dictionary_size # the file is 18 bytes
250
+ }
251
+ end
252
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
253
+ let(:event) { LogStash::Event.new("status" => "a") }
254
+
255
+ context "dictionary is over limit" do
256
+ let(:dictionary_size) { 17 }
257
+
258
+ it "raises exception" do
259
+ expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
260
+ end
261
+ end
262
+
263
+ context "dictionary is within limit" do
264
+ let(:dictionary_size) { 18 }
265
+
266
+ it "returns the exact translation" do
267
+ subject.register
268
+ subject.filter(event)
269
+ expect(event.get("translation")).to eq(1)
270
+ end
271
+ end
272
+
273
+ context "limit set to zero" do
274
+ let(:dictionary_size) { 0 }
275
+
276
+ it "raises configuration exception" do
277
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please set a positive number/)
278
+ end
279
+ end
280
+
281
+ context "limit is unset" do
282
+ let(:config) do
283
+ {
284
+ "source" => "status",
285
+ "target" => "translation",
286
+ "dictionary_path" => dictionary_path,
287
+ }
288
+ end
289
+
290
+ it "sets the limit to 128MB" do
291
+ subject.register
292
+ expect(subject.instance_variable_get(:@yaml_dictionary_code_point_limit)).to eq(134_217_728)
293
+ end
294
+ end
295
+
296
+ context "dictionary is json and limit is set" do
297
+ let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.json") }
298
+ let(:dictionary_size) { 100 }
299
+
300
+ it "raises configuration exception" do
301
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format/)
302
+ end
303
+ end
304
+
305
+ context "dictionary is json and limit is unset" do
306
+ let(:config) do
307
+ {
308
+ "source" => "status",
309
+ "target" => "translation",
310
+ "dictionary_path" => TranslateUtil.build_fixture_path("dict.json"),
311
+ }
312
+ end
313
+
314
+ it "returns the exact translation" do
315
+ subject.register
316
+ subject.filter(event)
317
+ expect(event.get("translation")).to eq(10)
318
+ end
319
+ end
320
+ end
321
+
243
322
  context "when using a map tagged yml file" do
244
323
  let(:dictionary_path) { TranslateUtil.build_fixture_path("tag-map-dict.yml") }
245
324
  let(:event) { LogStash::Event.new("status" => "six") }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-translate
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-08 00:00:00.000000000 Z
11
+ date: 2023-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -72,20 +72,34 @@ dependencies:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '1.0'
75
+ - !ruby/object:Gem::Dependency
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '1.0'
81
+ name: logstash-mixin-scheduler
82
+ prerelease: false
83
+ type: :runtime
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.0'
75
89
  - !ruby/object:Gem::Dependency
76
90
  requirement: !ruby/object:Gem::Requirement
77
91
  requirements:
78
92
  - - ">="
79
93
  - !ruby/object:Gem::Version
80
- version: '0'
81
- name: rufus-scheduler
94
+ version: 5.1.0
95
+ name: psych
82
96
  prerelease: false
83
97
  type: :runtime
84
98
  version_requirements: !ruby/object:Gem::Requirement
85
99
  requirements:
86
100
  - - ">="
87
101
  - !ruby/object:Gem::Version
88
- version: '0'
102
+ version: 5.1.0
89
103
  - !ruby/object:Gem::Dependency
90
104
  requirement: !ruby/object:Gem::Requirement
91
105
  requirements:
@@ -206,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
206
220
  - !ruby/object:Gem::Version
207
221
  version: '0'
208
222
  requirements: []
209
- rubygems_version: 3.1.6
223
+ rubygems_version: 3.2.33
210
224
  signing_key:
211
225
  specification_version: 4
212
226
  summary: Replaces field contents based on a hash or YAML file