logstash-filter-translate 3.4.0 → 3.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +11 -0
- data/lib/logstash/filters/dictionary/file.rb +7 -7
- data/lib/logstash/filters/dictionary/json_file.rb +0 -3
- data/lib/logstash/filters/dictionary/yaml_file.rb +7 -5
- data/lib/logstash/filters/translate.rb +25 -1
- data/logstash-filter-translate.gemspec +2 -1
- data/spec/filters/scheduling_spec.rb +4 -4
- data/spec/filters/translate_spec.rb +79 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2159e15fd76b237d148128893d1ee1de04787628a72f31a889021220d24ecd68
|
4
|
+
data.tar.gz: '05348931cfb26f1f6968ebe703c43c9d4b0f744deba529c66bc4b8887805b79d'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e0c5b356db73ea5815fad77e8335c4788857cf3bd9f79688152d0152fbf4e9cfd76297f0d0289a407f502a7475adf389398e2cbc9c69166337dc90dab0db1fc8
|
7
|
+
data.tar.gz: c66b52755ff2913c3e840ae08476636c07cd5cb90cebaaf9d8d128f4f0e132cb0b4b332730c2522ff79b1c9f76f2181f12063c54c8bac1adb7afa16c850ae82f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 3.4.1
|
2
|
+
- Fix the limitation of the size of yaml file that exceeds 3MB [#97](https://github.com/logstash-plugins/logstash-filter-translate/pull/97)
|
3
|
+
|
1
4
|
## 3.4.0
|
2
5
|
- Refactor: leverage scheduler mixin [#93](https://github.com/logstash-plugins/logstash-filter-translate/pull/93)
|
3
6
|
|
data/docs/index.asciidoc
CHANGED
@@ -108,6 +108,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
108
108
|
| <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
|
109
109
|
| <<plugins-{type}s-{plugin}-refresh_behaviour>> |<<string,string>>|No
|
110
110
|
| <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
|
111
|
+
| <<plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit>> |<<number,number>>|No
|
111
112
|
|=======================================================================
|
112
113
|
|
113
114
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -421,5 +422,15 @@ The target field you wish to populate with the translated code.
|
|
421
422
|
If you set this value to the same value as `source` field, the plugin does a substitution, and
|
422
423
|
the filter will succeed. This will clobber the old value of the source field!
|
423
424
|
|
425
|
+
|
426
|
+
[id="plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit"]
|
427
|
+
===== `yaml_dictionary_code_point_limit`
|
428
|
+
|
429
|
+
* Value type is <<number,number>>
|
430
|
+
* Default value is 134217728 (128MB for 1 byte code points)
|
431
|
+
|
432
|
+
The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
|
433
|
+
This setting is effective for YAML file only. YAML over the limit throws exception.
|
434
|
+
|
424
435
|
[id="plugins-{type}s-{plugin}-common-options"]
|
425
436
|
include::{include_path}/{type}.asciidoc[]
|
@@ -9,9 +9,9 @@ module LogStash module Filters module Dictionary
|
|
9
9
|
|
10
10
|
include LogStash::Util::Loggable
|
11
11
|
|
12
|
-
def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
|
12
|
+
def self.create(path, refresh_interval, refresh_behaviour, exact, regex, **file_type_args)
|
13
13
|
if /\.y[a]?ml$/.match(path)
|
14
|
-
instance = YamlFile.new(path, refresh_interval, exact, regex)
|
14
|
+
instance = YamlFile.new(path, refresh_interval, exact, regex, file_type_args)
|
15
15
|
elsif path.end_with?(".json")
|
16
16
|
instance = JsonFile.new(path, refresh_interval, exact, regex)
|
17
17
|
elsif path.end_with?(".csv")
|
@@ -31,7 +31,7 @@ module LogStash module Filters module Dictionary
|
|
31
31
|
|
32
32
|
attr_reader :dictionary, :fetch_strategy
|
33
33
|
|
34
|
-
def initialize(path, refresh_interval, exact, regex)
|
34
|
+
def initialize(path, refresh_interval, exact, regex, **file_type_args)
|
35
35
|
@dictionary_path = path
|
36
36
|
@refresh_interval = refresh_interval
|
37
37
|
@short_refresh = @refresh_interval <= 300
|
@@ -39,7 +39,7 @@ module LogStash module Filters module Dictionary
|
|
39
39
|
@write_lock = rw_lock.writeLock
|
40
40
|
@dictionary = Hash.new
|
41
41
|
@update_method = method(:merge_dictionary)
|
42
|
-
initialize_for_file_type
|
42
|
+
initialize_for_file_type(file_type_args)
|
43
43
|
args = [@dictionary, rw_lock]
|
44
44
|
klass = case
|
45
45
|
when exact && regex then FetchStrategy::File::ExactRegex
|
@@ -55,7 +55,7 @@ module LogStash module Filters module Dictionary
|
|
55
55
|
@dictionary_mtime = ::File.mtime(@dictionary_path).to_f
|
56
56
|
@update_method.call
|
57
57
|
rescue Errno::ENOENT
|
58
|
-
|
58
|
+
logger.warn("dictionary file read failure, continuing with old dictionary", :path => @dictionary_path)
|
59
59
|
rescue => e
|
60
60
|
loading_exception(e, raise_exception)
|
61
61
|
end
|
@@ -68,7 +68,7 @@ module LogStash module Filters module Dictionary
|
|
68
68
|
|
69
69
|
protected
|
70
70
|
|
71
|
-
def initialize_for_file_type
|
71
|
+
def initialize_for_file_type(**file_type_args)
|
72
72
|
# sub class specific initializer
|
73
73
|
end
|
74
74
|
|
@@ -120,7 +120,7 @@ module LogStash module Filters module Dictionary
|
|
120
120
|
dfe.set_backtrace(e.backtrace)
|
121
121
|
raise dfe
|
122
122
|
else
|
123
|
-
|
123
|
+
logger.warn("#{msg}, continuing with old dictionary", :dictionary_path => @dictionary_path)
|
124
124
|
end
|
125
125
|
end
|
126
126
|
end
|
@@ -6,9 +6,6 @@ module LogStash module Filters module Dictionary
|
|
6
6
|
|
7
7
|
protected
|
8
8
|
|
9
|
-
def initialize_for_file_type
|
10
|
-
end
|
11
|
-
|
12
9
|
def read_file_into_dictionary
|
13
10
|
content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
14
11
|
@dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
|
@@ -7,18 +7,20 @@ module LogStash module Filters module Dictionary
|
|
7
7
|
|
8
8
|
protected
|
9
9
|
|
10
|
-
def initialize_for_file_type
|
10
|
+
def initialize_for_file_type(**file_type_args)
|
11
11
|
@visitor = YamlVisitor.create
|
12
|
+
|
13
|
+
@parser = Psych::Parser.new(Psych::TreeBuilder.new)
|
14
|
+
@parser.code_point_limit = file_type_args[:yaml_code_point_limit]
|
12
15
|
end
|
13
16
|
|
14
17
|
def read_file_into_dictionary
|
15
18
|
# low level YAML read that tries to create as
|
16
19
|
# few intermediate objects as possible
|
17
20
|
# this overwrites the value at key
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
))
|
21
|
+
yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
|
22
|
+
@parser.parse(yaml_string, @dictionary_path)
|
23
|
+
@visitor.accept_with_dictionary(@dictionary, @parser.handler.root)
|
22
24
|
end
|
23
25
|
end
|
24
26
|
end end end
|
@@ -102,6 +102,12 @@ class Translate < LogStash::Filters::Base
|
|
102
102
|
# as the original text, and the second column as the replacement.
|
103
103
|
config :dictionary_path, :validate => :path
|
104
104
|
|
105
|
+
# The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
|
106
|
+
# Snakeyaml 1.33 has a default limit 3MB. YAML file over the limit throws exception. JSON and CSV currently do not have such limit.
|
107
|
+
# The limit could be too small in some use cases. Setting a bigger number in `yaml_dictionary_code_point_limit` to relax the restriction.
|
108
|
+
# The default value is 128MB for code points of size 1 byte
|
109
|
+
config :yaml_dictionary_code_point_limit, :validate => :number
|
110
|
+
|
105
111
|
# When using a dictionary file, this setting will indicate how frequently
|
106
112
|
# (in seconds) logstash will check the dictionary file for updates.
|
107
113
|
config :refresh_interval, :validate => :number, :default => 300
|
@@ -180,8 +186,22 @@ class Translate < LogStash::Filters::Base
|
|
180
186
|
)
|
181
187
|
end
|
182
188
|
|
189
|
+
# check and set yaml code point limit
|
190
|
+
# set lookup dictionary
|
183
191
|
if @dictionary_path
|
184
|
-
|
192
|
+
if yaml_file?(@dictionary_path)
|
193
|
+
@yaml_dictionary_code_point_limit ||= 134_217_728
|
194
|
+
|
195
|
+
if @yaml_dictionary_code_point_limit <= 0
|
196
|
+
raise LogStash::ConfigurationError, "Please set a positive number in `yaml_dictionary_code_point_limit => #{@yaml_dictionary_code_point_limit}`."
|
197
|
+
else
|
198
|
+
@lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit)
|
199
|
+
end
|
200
|
+
elsif @yaml_dictionary_code_point_limit != nil
|
201
|
+
raise LogStash::ConfigurationError, "Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format"
|
202
|
+
else
|
203
|
+
@lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
|
204
|
+
end
|
185
205
|
else
|
186
206
|
@lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
|
187
207
|
end
|
@@ -245,5 +265,9 @@ class Translate < LogStash::Filters::Base
|
|
245
265
|
@logger.error("Something went wrong when attempting to translate from dictionary", :exception => e, :source => @source, :event => event.to_hash)
|
246
266
|
end
|
247
267
|
end # def filter
|
268
|
+
|
269
|
+
def yaml_file?(path)
|
270
|
+
/\.y[a]?ml$/.match(path)
|
271
|
+
end
|
248
272
|
end # class LogStash::Filters::Translate
|
249
273
|
end end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-translate'
|
4
|
-
s.version = '3.4.
|
4
|
+
s.version = '3.4.1'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Replaces field contents based on a hash or YAML file"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0'
|
26
26
|
s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~> 1.0'
|
27
27
|
s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0'
|
28
|
+
s.add_runtime_dependency "psych", ">= 5.1.0"
|
28
29
|
|
29
30
|
s.add_development_dependency 'logstash-devutils'
|
30
31
|
s.add_development_dependency 'rspec-sequencing'
|
@@ -57,9 +57,9 @@ describe LogStash::Filters::Translate do
|
|
57
57
|
end
|
58
58
|
end
|
59
59
|
.then_after(1.2, "wait then translate again") do
|
60
|
-
subject.filter(event)
|
61
60
|
try(5) do
|
62
|
-
|
61
|
+
subject.filter(event)
|
62
|
+
wait(0.5).for{event.get("[translation]")}.to eq("12"), "field [translation] did not eq '12'"
|
63
63
|
end
|
64
64
|
end
|
65
65
|
.then("stop") do
|
@@ -88,9 +88,9 @@ describe LogStash::Filters::Translate do
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
.then_after(1.2, "wait then translate again") do
|
91
|
-
subject.filter(event)
|
92
91
|
try(5) do
|
93
|
-
|
92
|
+
subject.filter(event)
|
93
|
+
wait(0.5).for{event.get("[translation]")}.to eq("22"), "field [translation] did not eq '22'"
|
94
94
|
end
|
95
95
|
end
|
96
96
|
.then("stop") do
|
@@ -240,6 +240,85 @@ describe LogStash::Filters::Translate do
|
|
240
240
|
end
|
241
241
|
end
|
242
242
|
|
243
|
+
describe "when using a yml dictionary with code point limit" do
|
244
|
+
let(:config) do
|
245
|
+
{
|
246
|
+
"source" => "status",
|
247
|
+
"target" => "translation",
|
248
|
+
"dictionary_path" => dictionary_path,
|
249
|
+
"yaml_dictionary_code_point_limit" => dictionary_size # the file is 18 bytes
|
250
|
+
}
|
251
|
+
end
|
252
|
+
let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
|
253
|
+
let(:event) { LogStash::Event.new("status" => "a") }
|
254
|
+
|
255
|
+
context "dictionary is over limit" do
|
256
|
+
let(:dictionary_size) { 17 }
|
257
|
+
|
258
|
+
it "raises exception" do
|
259
|
+
expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
context "dictionary is within limit" do
|
264
|
+
let(:dictionary_size) { 18 }
|
265
|
+
|
266
|
+
it "returns the exact translation" do
|
267
|
+
subject.register
|
268
|
+
subject.filter(event)
|
269
|
+
expect(event.get("translation")).to eq(1)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
context "limit set to zero" do
|
274
|
+
let(:dictionary_size) { 0 }
|
275
|
+
|
276
|
+
it "raises configuration exception" do
|
277
|
+
expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please set a positive number/)
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
context "limit is unset" do
|
282
|
+
let(:config) do
|
283
|
+
{
|
284
|
+
"source" => "status",
|
285
|
+
"target" => "translation",
|
286
|
+
"dictionary_path" => dictionary_path,
|
287
|
+
}
|
288
|
+
end
|
289
|
+
|
290
|
+
it "sets the limit to 128MB" do
|
291
|
+
subject.register
|
292
|
+
expect(subject.instance_variable_get(:@yaml_dictionary_code_point_limit)).to eq(134_217_728)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
context "dictionary is json and limit is set" do
|
297
|
+
let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.json") }
|
298
|
+
let(:dictionary_size) { 100 }
|
299
|
+
|
300
|
+
it "raises configuration exception" do
|
301
|
+
expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format/)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
context "dictionary is json and limit is unset" do
|
306
|
+
let(:config) do
|
307
|
+
{
|
308
|
+
"source" => "status",
|
309
|
+
"target" => "translation",
|
310
|
+
"dictionary_path" => TranslateUtil.build_fixture_path("dict.json"),
|
311
|
+
}
|
312
|
+
end
|
313
|
+
|
314
|
+
it "returns the exact translation" do
|
315
|
+
subject.register
|
316
|
+
subject.filter(event)
|
317
|
+
expect(event.get("translation")).to eq(10)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
243
322
|
context "when using a map tagged yml file" do
|
244
323
|
let(:dictionary_path) { TranslateUtil.build_fixture_path("tag-map-dict.yml") }
|
245
324
|
let(:event) { LogStash::Event.new("status" => "six") }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-translate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.4.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-05-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,6 +86,20 @@ dependencies:
|
|
86
86
|
- - "~>"
|
87
87
|
- !ruby/object:Gem::Version
|
88
88
|
version: '1.0'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 5.1.0
|
95
|
+
name: psych
|
96
|
+
prerelease: false
|
97
|
+
type: :runtime
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 5.1.0
|
89
103
|
- !ruby/object:Gem::Dependency
|
90
104
|
requirement: !ruby/object:Gem::Requirement
|
91
105
|
requirements:
|
@@ -206,7 +220,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
206
220
|
- !ruby/object:Gem::Version
|
207
221
|
version: '0'
|
208
222
|
requirements: []
|
209
|
-
rubygems_version: 3.
|
223
|
+
rubygems_version: 3.2.33
|
210
224
|
signing_key:
|
211
225
|
specification_version: 4
|
212
226
|
summary: Replaces field contents based on a hash or YAML file
|