logstash-input-elasticsearch 4.17.0 → 4.17.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43e3c8e44ba8bd1ce4065a1410b26a63d621180d0354b4b4aa8f3891962d9986
4
- data.tar.gz: c9f6d3b53d1bb90c9ad77eab86b92818128f310aa95a80e2908bdc5b981a780f
3
+ metadata.gz: 9cdbda91c48e11353b49426f1d9c01e461698e507506563848045560d05421d0
4
+ data.tar.gz: 367351221d9028775410a4ef272de5bf6bad66ab82d91aedacb91ef462c5ff56
5
5
  SHA512:
6
- metadata.gz: 5799d6ef93349a2ca9a6408bba0bbaa78f5c985137a4d126c70efe53122b7eac4d1fcda48aeb51f629651ea04e4457e9bb9705b760744b0ff22cd462f617c7b8
7
- data.tar.gz: f0057cebd50dc79156d8f05be48341f009366fc9057df0f61d50327a3cecf0a95ebccd05bed120173ab429caab295d5b71f25edc04232fd36ecf862ad58bd974
6
+ metadata.gz: 88071a99d4ee00c356c2729cdb0d873e7b0ecb067f02e3ccc76c83ec86166d4b4c1daa036d6943a5913339b524e69ecf3f741d17cd871cdba8cf19f59a371016
7
+ data.tar.gz: 1b20527dd5f3f3ca00bed248b6cdb57dc81d816f56d3f3d08c393fec4fdcf37f57f8ba61dec957efe03cf6f8695426ef544780e32d7b6818671ae0f0d6930b90
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 4.17.1
2
+ - Fix: scroll slice high memory consumption [#189](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/189)
3
+
1
4
  ## 4.17.0
2
5
  - Added SSL settings for: [#185](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/185)
3
6
  - `ssl_enabled`: Enable/disable the SSL settings. If not provided, the value is inferred from the hosts scheme
data/docs/index.asciidoc CHANGED
@@ -353,6 +353,7 @@ documents and/or the <<plugins-{type}s-{plugin}-size>> has been specified as a l
353
353
  The number of times to re-run the query after the first failure. If the query fails after all retries, it logs an error message.
354
354
  The default is 0 (no retry). This value should be equal to or greater than zero.
355
355
 
356
+ NOTE: Partial failures - such as errors in a subset of all slices - can result in the entire query being retried, which can lead to duplication of data. Avoiding this would require Logstash to store the entire result set of a query in memory which is often not possible.
356
357
 
357
358
  [id="plugins-{type}s-{plugin}-schedule"]
358
359
  ===== `schedule`
@@ -256,6 +256,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
256
256
  # config :ca_trusted_fingerprint, :validate => :sha_256_hex
257
257
  include LogStash::PluginMixins::CATrustedFingerprintSupport
258
258
 
259
+ attr_reader :pipeline_id
260
+
259
261
  def initialize(params={})
260
262
  super(params)
261
263
 
@@ -267,6 +269,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
267
269
  def register
268
270
  require "rufus/scheduler"
269
271
 
272
+ @pipeline_id = execution_context&.pipeline_id || 'main'
273
+
270
274
  fill_hosts_from_cloud_id
271
275
  setup_ssl_params!
272
276
 
@@ -326,20 +330,22 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
326
330
  def do_run(output_queue)
327
331
  # if configured to run a single slice, don't bother spinning up threads
328
332
  if @slices.nil? || @slices <= 1
329
- success, events = retryable_slice
330
- success && events.each { |event| output_queue << event }
331
- return
333
+ return retryable(JOB_NAME) do
334
+ do_run_slice(output_queue)
335
+ end
332
336
  end
333
337
 
334
338
  logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
335
339
 
336
- slice_results = parallel_slice # array of tuple(ok, events)
337
340
 
338
- # insert events to queue if all slices success
339
- if slice_results.all?(&:first)
340
- slice_results.flat_map { |success, events| events }
341
- .each { |event| output_queue << event }
342
- end
341
+ @slices.times.map do |slice_id|
342
+ Thread.new do
343
+ LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
344
+ retryable(JOB_NAME) do
345
+ do_run_slice(output_queue, slice_id)
346
+ end
347
+ end
348
+ end.map(&:join)
343
349
 
344
350
  logger.trace("#{@slices} slices completed")
345
351
  end
@@ -347,42 +353,14 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
347
353
  def retryable(job_name, &block)
348
354
  begin
349
355
  stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name)
350
- output = stud_try.try((@retries + 1).times) { yield }
351
- [true, output]
356
+ stud_try.try((@retries + 1).times) { yield }
352
357
  rescue => e
353
358
  error_details = {:message => e.message, :cause => e.cause}
354
359
  error_details[:backtrace] = e.backtrace if logger.debug?
355
360
  logger.error("Tried #{job_name} unsuccessfully", error_details)
356
- [false, nil]
357
361
  end
358
362
  end
359
363
 
360
-
361
- # @return [(ok, events)] : Array of tuple(Boolean, [Logstash::Event])
362
- def parallel_slice
363
- pipeline_id = execution_context&.pipeline_id || 'main'
364
- @slices.times.map do |slice_id|
365
- Thread.new do
366
- LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
367
- retryable_slice(slice_id)
368
- end
369
- end.map do |t|
370
- t.join
371
- t.value
372
- end
373
- end
374
-
375
- # @param scroll_id [Integer]
376
- # @return (ok, events) [Boolean, Array(Logstash::Event)]
377
- def retryable_slice(slice_id=nil)
378
- retryable(JOB_NAME) do
379
- output = []
380
- do_run_slice(output, slice_id)
381
- output
382
- end
383
- end
384
-
385
-
386
364
  def do_run_slice(output_queue, slice_id=nil)
387
365
  slice_query = @base_query
388
366
  slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-elasticsearch'
4
- s.version = '4.17.0'
4
+ s.version = '4.17.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Reads query results from an Elasticsearch cluster"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -204,7 +204,7 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
204
204
  context 'with `slices => 1`' do
205
205
  let(:slices) { 1 }
206
206
  it 'runs just one slice' do
207
- expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), nil)
207
+ expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
208
208
  expect(Thread).to_not receive(:new)
209
209
 
210
210
  plugin.register
@@ -215,7 +215,7 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
215
215
  context 'without slices directive' do
216
216
  let(:config) { super().tap { |h| h.delete('slices') } }
217
217
  it 'runs just one slice' do
218
- expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), nil)
218
+ expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
219
219
  expect(Thread).to_not receive(:new)
220
220
 
221
221
  plugin.register
@@ -414,18 +414,19 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
414
414
  expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
415
415
  plugin.register
416
416
 
417
- expect(client).to receive(:clear_scroll).and_return(nil)
417
+ expect(client).to receive(:clear_scroll).twice.and_return(nil)
418
418
 
419
- # SLICE0 is a three-page scroll in which the second page throw exception
419
+ # SLICE0 is a three-page scroll
420
420
  slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2}))
421
421
  expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0)
422
- expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_raise("boom")
422
+ expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1)
423
+ expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2)
423
424
  allow(client).to receive(:ping)
424
425
 
425
- # SLICE1 is a two-page scroll in which the last page has no next scroll id
426
+ # SLICE1 is a two-page scroll in which the last page throws exception
426
427
  slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2}))
427
428
  expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
428
- expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1)
429
+ expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom")
429
430
 
430
431
  synchronize_method!(plugin, :scroll_request)
431
432
  synchronize_method!(plugin, :search_request)
@@ -433,18 +434,22 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
433
434
 
434
435
  let(:client) { Elasticsearch::Client.new }
435
436
 
436
- it 'does not insert event to queue' do
437
- expect(plugin).to receive(:parallel_slice).and_wrap_original do |m, *args|
438
- slice0, slice1 = m.call
439
- expect(slice0[0]).to be_falsey
440
- expect(slice1[0]).to be_truthy
441
- expect(slice1[1].size).to eq(4) # four items from SLICE1
442
- [slice0, slice1]
437
+ it 'insert event to queue without waiting other slices' do
438
+ expect(plugin).to receive(:do_run_slice).twice.and_wrap_original do |m, *args|
439
+ q = args[0]
440
+ slice_id = args[1]
441
+ if slice_id == 0
442
+ m.call(*args)
443
+ expect(q.size).to eq(3)
444
+ else
445
+ sleep(1)
446
+ m.call(*args)
447
+ end
443
448
  end
444
449
 
445
450
  queue = Queue.new
446
451
  plugin.run(queue)
447
- expect(queue.size).to eq(0)
452
+ expect(queue.size).to eq(5)
448
453
  end
449
454
  end
450
455
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.17.0
4
+ version: 4.17.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-10 00:00:00.000000000 Z
11
+ date: 2023-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement