logstash-filter-translate 3.4.3 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 653e221a65b8b528a26f20f1b4c94ce74ecab0ba09e274bba23f0a605819672b
4
- data.tar.gz: fae94de89c0a5da2bb87aef35f1aca31fbaefb8717cc292b5f364a886f66e30b
3
+ metadata.gz: a657937387054b277b294032c5acb3ea34bc8255e3c0e344894ee1160d7d1fa5
4
+ data.tar.gz: 61c5fdbd1fb8e702109eb494caeffe3100a912e2fb4de98491848969dd7265f1
5
5
  SHA512:
6
- metadata.gz: 60feb1d75c7579f5b27ee4887f8eec27049f2b54c75b0c554923c958ae46d64261b53a5d494d39c3ac3194d03ceff6bbdff2a92e39e879a0c44d491221b34bd1
7
- data.tar.gz: 69c95f3a4313baf6f7bc40557ac73f173405607873dfd483310c8f62da415c9e46d2d24c52133cabe22011bd823f2029272023c6e903c02fc5328a926ef32e72
6
+ metadata.gz: 595cbd7c55aa076f4ce4818654cd02fe0c181527266cc5f808a1bf85c8ad7c6864845f12e7401779aecde04b2017e50282af5b9a137d2284077a70a21055c183
7
+ data.tar.gz: dc77c15b65f70981a77271c174c2fcc64266d8f1cd59925e31d334c8293c0a683597ee130f40213ced66b3ffd7c3ca00b359b937685f499eb4f27118ad28f512
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 3.5.0
2
+ - Introduce opt-in "yaml_load_strategy => streaming" to stream parse YAML dictionaries [#106](https://github.com/logstash-plugins/logstash-filter-translate/pull/106)
3
+
1
4
  ## 3.4.3
2
5
  - Allow YamlFile's Psych::Parser and Visitor instances to be garbage collected [#104](https://github.com/logstash-plugins/logstash-filter-translate/pull/104)
3
6
 
data/docs/index.asciidoc CHANGED
@@ -109,6 +109,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
109
109
  | <<plugins-{type}s-{plugin}-refresh_behaviour>> |<<string,string>>|No
110
110
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
111
111
  | <<plugins-{type}s-{plugin}-yaml_dictionary_code_point_limit>> |<<number,number>>|No
112
+ | <<plugins-{type}s-{plugin}-yaml_load_strategy>> |<<string,string>>, one of `["one_shot", "streaming"]`|No
112
113
  |=======================================================================
113
114
 
114
115
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -432,5 +433,21 @@ the filter will succeed. This will clobber the old value of the source field!
432
433
  The max amount of code points in the YAML file in `dictionary_path`. Please be aware that byte limit depends on the encoding.
433
434
  This setting is effective for YAML file only. YAML over the limit throws exception.
434
435
 
436
+ [id="plugins-{type}s-{plugin}-yaml_load_strategy"]
437
+ ===== `yaml_load_strategy`
438
+
439
+ * Value can be any of: `one_shot`, `streaming`
440
+ * Default value is `one_shot`
441
+
442
+ How to load and parse the YAML file. This setting defaults to `one_shot`, which loads the entire
443
+ YAML file into the parser in one go, emitting the final dictionary from the fully parsed YAML document.
444
+
445
+ Setting to `streaming` will instead instruct the parser to emit one "YAML element" at a time, constructing the dictionary
446
+ during parsing. This mode drastically reduces the amount of memory required to load or refresh the dictionary and it is also faster.
447
+
448
+ Due to underlying implementation differences this mode only supports basic types such as Arrays, Objects, Strings, numbers and booleans, and does not support tags.
449
+
450
+ If you have a lot of translate filters with large YAML documents consider changing this setting to `streaming` instead.
451
+
435
452
  [id="plugins-{type}s-{plugin}-common-options"]
436
453
  include::{include_path}/{type}.asciidoc[]
@@ -0,0 +1,111 @@
1
+ module LogStash module Filters module Dictionary
2
+ class StreamingYamlDictParser
3
+ def snakeYamlEngineV2
4
+ Java::org.snakeyaml.engine.v2
5
+ end
6
+
7
+ def snakeYamlEngineV2Events
8
+ snakeYamlEngineV2.events
9
+ end
10
+
11
+ def initialize(filename, yaml_code_point_limit)
12
+ settings = snakeYamlEngineV2.api.LoadSettings.builder
13
+ .set_code_point_limit(yaml_code_point_limit)
14
+ .build
15
+
16
+ stream = Java::java.io.FileInputStream.new(filename)
17
+ reader = Java::java.io.InputStreamReader.new(stream, Java::java.nio.charset.StandardCharsets::UTF_8)
18
+ stream_reader = snakeYamlEngineV2.scanner.StreamReader.new(reader, settings)
19
+
20
+ @parser = snakeYamlEngineV2.parser.ParserImpl.new(stream_reader, settings)
21
+
22
+ skip_until(snakeYamlEngineV2Events.MappingStartEvent)
23
+ end
24
+
25
+
26
+ def each_pair
27
+ while peek_event && !peek_event.is_a?(snakeYamlEngineV2Events.MappingEndEvent)
28
+ key = parse_node
29
+ value = parse_node
30
+ yield(key, value)
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def next_event
37
+ @parser.next
38
+ ensure
39
+ nil
40
+ end
41
+
42
+ def peek_event
43
+ @parser.peek_event
44
+ end
45
+
46
+ def skip_until(event_class)
47
+ while @parser.has_next
48
+ evt = @parser.next
49
+ return if event_class === evt
50
+ end
51
+ end
52
+
53
+ def parse_node
54
+ event = next_event
55
+
56
+ case event
57
+ when snakeYamlEngineV2Events.ScalarEvent
58
+ parse_scalar(event)
59
+ when snakeYamlEngineV2Events.MappingStartEvent
60
+ parse_mapping
61
+ when snakeYamlEngineV2Events.SequenceStartEvent
62
+ parse_sequence
63
+ else
64
+ raise "Unexpected event: #{event.class}"
65
+ end
66
+ end
67
+
68
+ def parse_mapping
69
+ hash = {}
70
+ while peek_event && !peek_event.is_a?(snakeYamlEngineV2Events.MappingEndEvent)
71
+ key = parse_node
72
+ value = parse_node
73
+ hash[key] = value
74
+ end
75
+ next_event
76
+ hash
77
+ end
78
+
79
+ def parse_sequence
80
+ array = []
81
+ while peek_event && !peek_event.is_a?(snakeYamlEngineV2Events.SequenceEndEvent)
82
+ array << parse_node
83
+ end
84
+ next_event
85
+ array
86
+ end
87
+
88
+ def parse_scalar(scalar)
89
+ value = scalar.value
90
+ # return quoted scalars as they are
91
+ # e.g. don't convert "true" to true
92
+ return value unless scalar.is_plain
93
+
94
+ # otherwise let's do some checking and conversions
95
+ case value
96
+ when 'null', '', '~' then nil
97
+ when 'true' then true
98
+ when 'false' then false
99
+ else
100
+ # Try to convert to integer or float
101
+ if value.match?(/\A-?\d+\z/)
102
+ value.to_i
103
+ elsif value.match?(/\A-?\d+\.\d+\z/)
104
+ value.to_f
105
+ else
106
+ value
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end end end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require_relative "yaml_visitor"
4
+ require_relative "streaming_yaml_parser"
4
5
 
5
6
  module LogStash module Filters module Dictionary
6
7
  class YamlFile < File
@@ -9,18 +10,24 @@ module LogStash module Filters module Dictionary
9
10
 
10
11
  def initialize_for_file_type(**file_type_args)
11
12
  @yaml_code_point_limit = file_type_args[:yaml_code_point_limit]
13
+ @yaml_load_strategy = file_type_args[:yaml_load_strategy]
12
14
  end
13
15
 
14
16
  def read_file_into_dictionary
15
- visitor = YamlVisitor.create
16
- parser = Psych::Parser.new(Psych::TreeBuilder.new)
17
- parser.code_point_limit = @yaml_code_point_limit
18
- # low level YAML read that tries to create as
19
- # few intermediate objects as possible
20
- # this overwrites the value at key
21
- yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
22
- parser.parse(yaml_string, @dictionary_path)
23
- visitor.accept_with_dictionary(@dictionary, parser.handler.root)
17
+ if @yaml_load_strategy == "one_shot"
18
+ visitor = YamlVisitor.create
19
+ parser = Psych::Parser.new(Psych::TreeBuilder.new)
20
+ parser.code_point_limit = @yaml_code_point_limit
21
+ # low level YAML read that tries to create as
22
+ # few intermediate objects as possible
23
+ # this overwrites the value at key
24
+ yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
25
+ parser.parse(yaml_string, @dictionary_path)
26
+ visitor.accept_with_dictionary(@dictionary, parser.handler.root)
27
+ else # stream parse it
28
+ parser = StreamingYamlDictParser.new(@dictionary_path, @yaml_code_point_limit)
29
+ parser.each_pair {|key, value| @dictionary[key] = value }
30
+ end
24
31
  end
25
32
  end
26
33
  end end end
@@ -108,6 +108,10 @@ class Translate < LogStash::Filters::Base
108
108
  # The default value is 128MB for code points of size 1 byte
109
109
  config :yaml_dictionary_code_point_limit, :validate => :number
110
110
 
111
+ # either load the entire yaml into memory before generating the in-memory dictionary
112
+ # alternatively "streaming" will gradually build the dictionary with little memory overhead
113
+ config :yaml_load_strategy, :validate => ["streaming", "one_shot"], :default => "one_shot"
114
+
111
115
  # When using a dictionary file, this setting will indicate how frequently
112
116
  # (in seconds) logstash will check the dictionary file for updates.
113
117
  config :refresh_interval, :validate => :number, :default => 300
@@ -195,7 +199,7 @@ class Translate < LogStash::Filters::Base
195
199
  if @yaml_dictionary_code_point_limit <= 0
196
200
  raise LogStash::ConfigurationError, "Please set a positive number in `yaml_dictionary_code_point_limit => #{@yaml_dictionary_code_point_limit}`."
197
201
  else
198
- @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit)
202
+ @lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, yaml_code_point_limit: @yaml_dictionary_code_point_limit, yaml_load_strategy: @yaml_load_strategy)
199
203
  end
200
204
  elsif @yaml_dictionary_code_point_limit != nil
201
205
  raise LogStash::ConfigurationError, "Please remove `yaml_dictionary_code_point_limit` for dictionary file in JSON or CSV format"
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-translate'
4
- s.version = '3.4.3'
4
+ s.version = '3.5.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Replaces field contents based on a hash or YAML file"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -238,6 +238,21 @@ describe LogStash::Filters::Translate do
238
238
  subject.filter(event)
239
239
  expect(event.get("translation")).to eq(1)
240
240
  end
241
+
242
+ describe "yaml_load_strategy" do
243
+ let(:one_shot_parse_filter) { subject }
244
+ let(:streaming_parse_filter) { described_class.new(config.merge("yaml_load_strategy" => 'streaming')) }
245
+
246
+ before(:each) do
247
+ subject.register
248
+ streaming_parse_filter.register
249
+ end
250
+ let(:one_shot_dictionary) { one_shot_parse_filter.lookup.dictionary }
251
+ let(:streaming_dictionary) { streaming_parse_filter.lookup.dictionary }
252
+ it "produces an equivalent dictionary for both strategies" do
253
+ expect(one_shot_dictionary).to eq(streaming_dictionary)
254
+ end
255
+ end
241
256
  end
242
257
 
243
258
  describe "when using a yml dictionary with code point limit" do
@@ -246,14 +261,16 @@ describe LogStash::Filters::Translate do
246
261
  "source" => "status",
247
262
  "target" => "translation",
248
263
  "dictionary_path" => dictionary_path,
249
- "yaml_dictionary_code_point_limit" => dictionary_size # the file is 18 bytes
264
+ "yaml_dictionary_code_point_limit" => codepoint_limit
250
265
  }
251
266
  end
252
267
  let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
268
+ let(:dictionary_size) { IO.read(dictionary_path).size }
253
269
  let(:event) { LogStash::Event.new("status" => "a") }
270
+ let(:codepoint_limit) { dictionary_size }
254
271
 
255
- context "dictionary is over limit" do
256
- let(:dictionary_size) { 17 }
272
+ context "codepoint limit under dictionary size" do
273
+ let(:codepoint_limit) { dictionary_size / 2 }
257
274
 
258
275
  it "raises exception" do
259
276
  expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
@@ -261,8 +278,6 @@ describe LogStash::Filters::Translate do
261
278
  end
262
279
 
263
280
  context "dictionary is within limit" do
264
- let(:dictionary_size) { 18 }
265
-
266
281
  it "returns the exact translation" do
267
282
  subject.register
268
283
  subject.filter(event)
@@ -1,3 +1,4 @@
1
1
  a : 1
2
2
  b : 2
3
3
  c : 3
4
+ d : { "e": [1, "hello", true, "false", "1", "1.1"] }
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-translate
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.3
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2025-07-24 00:00:00.000000000 Z
10
+ date: 2025-08-04 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
13
+ name: logstash-core-plugin-api
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - ">="
@@ -19,7 +19,6 @@ dependencies:
19
19
  - - "<="
20
20
  - !ruby/object:Gem::Version
21
21
  version: '2.99'
22
- name: logstash-core-plugin-api
23
22
  type: :runtime
24
23
  prerelease: false
25
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -31,12 +30,12 @@ dependencies:
31
30
  - !ruby/object:Gem::Version
32
31
  version: '2.99'
33
32
  - !ruby/object:Gem::Dependency
33
+ name: logstash-mixin-ecs_compatibility_support
34
34
  requirement: !ruby/object:Gem::Requirement
35
35
  requirements:
36
36
  - - "~>"
37
37
  - !ruby/object:Gem::Version
38
38
  version: '1.2'
39
- name: logstash-mixin-ecs_compatibility_support
40
39
  type: :runtime
41
40
  prerelease: false
42
41
  version_requirements: !ruby/object:Gem::Requirement
@@ -45,12 +44,12 @@ dependencies:
45
44
  - !ruby/object:Gem::Version
46
45
  version: '1.2'
47
46
  - !ruby/object:Gem::Dependency
47
+ name: logstash-mixin-validator_support
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  requirements:
50
50
  - - "~>"
51
51
  - !ruby/object:Gem::Version
52
52
  version: '1.0'
53
- name: logstash-mixin-validator_support
54
53
  type: :runtime
55
54
  prerelease: false
56
55
  version_requirements: !ruby/object:Gem::Requirement
@@ -59,12 +58,12 @@ dependencies:
59
58
  - !ruby/object:Gem::Version
60
59
  version: '1.0'
61
60
  - !ruby/object:Gem::Dependency
61
+ name: logstash-mixin-deprecation_logger_support
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
64
  - - "~>"
65
65
  - !ruby/object:Gem::Version
66
66
  version: '1.0'
67
- name: logstash-mixin-deprecation_logger_support
68
67
  type: :runtime
69
68
  prerelease: false
70
69
  version_requirements: !ruby/object:Gem::Requirement
@@ -73,12 +72,12 @@ dependencies:
73
72
  - !ruby/object:Gem::Version
74
73
  version: '1.0'
75
74
  - !ruby/object:Gem::Dependency
75
+ name: logstash-mixin-scheduler
76
76
  requirement: !ruby/object:Gem::Requirement
77
77
  requirements:
78
78
  - - "~>"
79
79
  - !ruby/object:Gem::Version
80
80
  version: '1.0'
81
- name: logstash-mixin-scheduler
82
81
  type: :runtime
83
82
  prerelease: false
84
83
  version_requirements: !ruby/object:Gem::Requirement
@@ -87,12 +86,12 @@ dependencies:
87
86
  - !ruby/object:Gem::Version
88
87
  version: '1.0'
89
88
  - !ruby/object:Gem::Dependency
89
+ name: psych
90
90
  requirement: !ruby/object:Gem::Requirement
91
91
  requirements:
92
92
  - - ">="
93
93
  - !ruby/object:Gem::Version
94
94
  version: 5.1.0
95
- name: psych
96
95
  type: :runtime
97
96
  prerelease: false
98
97
  version_requirements: !ruby/object:Gem::Requirement
@@ -101,12 +100,12 @@ dependencies:
101
100
  - !ruby/object:Gem::Version
102
101
  version: 5.1.0
103
102
  - !ruby/object:Gem::Dependency
103
+ name: logstash-devutils
104
104
  requirement: !ruby/object:Gem::Requirement
105
105
  requirements:
106
106
  - - ">="
107
107
  - !ruby/object:Gem::Version
108
108
  version: '0'
109
- name: logstash-devutils
110
109
  type: :development
111
110
  prerelease: false
112
111
  version_requirements: !ruby/object:Gem::Requirement
@@ -115,12 +114,12 @@ dependencies:
115
114
  - !ruby/object:Gem::Version
116
115
  version: '0'
117
116
  - !ruby/object:Gem::Dependency
117
+ name: rspec-sequencing
118
118
  requirement: !ruby/object:Gem::Requirement
119
119
  requirements:
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
122
  version: '0'
123
- name: rspec-sequencing
124
123
  type: :development
125
124
  prerelease: false
126
125
  version_requirements: !ruby/object:Gem::Requirement
@@ -129,12 +128,12 @@ dependencies:
129
128
  - !ruby/object:Gem::Version
130
129
  version: '0'
131
130
  - !ruby/object:Gem::Dependency
131
+ name: rspec-wait
132
132
  requirement: !ruby/object:Gem::Requirement
133
133
  requirements:
134
134
  - - ">="
135
135
  - !ruby/object:Gem::Version
136
136
  version: '0'
137
- name: rspec-wait
138
137
  type: :development
139
138
  prerelease: false
140
139
  version_requirements: !ruby/object:Gem::Requirement
@@ -143,12 +142,12 @@ dependencies:
143
142
  - !ruby/object:Gem::Version
144
143
  version: '0'
145
144
  - !ruby/object:Gem::Dependency
145
+ name: benchmark-ips
146
146
  requirement: !ruby/object:Gem::Requirement
147
147
  requirements:
148
148
  - - ">="
149
149
  - !ruby/object:Gem::Version
150
150
  version: '0'
151
- name: benchmark-ips
152
151
  type: :development
153
152
  prerelease: false
154
153
  version_requirements: !ruby/object:Gem::Requirement
@@ -177,6 +176,7 @@ files:
177
176
  - lib/logstash/filters/dictionary/file.rb
178
177
  - lib/logstash/filters/dictionary/json_file.rb
179
178
  - lib/logstash/filters/dictionary/memory.rb
179
+ - lib/logstash/filters/dictionary/streaming_yaml_parser.rb
180
180
  - lib/logstash/filters/dictionary/yaml_file.rb
181
181
  - lib/logstash/filters/dictionary/yaml_visitor.rb
182
182
  - lib/logstash/filters/fetch_strategy/file.rb
@@ -205,7 +205,6 @@ licenses:
205
205
  metadata:
206
206
  logstash_plugin: 'true'
207
207
  logstash_group: filter
208
- post_install_message:
209
208
  rdoc_options: []
210
209
  require_paths:
211
210
  - lib
@@ -220,8 +219,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
220
219
  - !ruby/object:Gem::Version
221
220
  version: '0'
222
221
  requirements: []
223
- rubygems_version: 3.3.26
224
- signing_key:
222
+ rubygems_version: 3.6.3
225
223
  specification_version: 4
226
224
  summary: Replaces field contents based on a hash or YAML file
227
225
  test_files: