logstash-input-elasticsearch 4.18.0 → 4.19.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 035d372d2fbedd9c038c2ad182ea8826ce35f73dab09d7c1534dde9b019e6c82
4
- data.tar.gz: 04a65a4a915231720975f25d614d44bb9b3b8f5cdde77142178b66eb6678be1c
3
+ metadata.gz: 5d41981efcf200ca8ec3ae620594b87054b916f4aaf9dd93b58562b9a0207a95
4
+ data.tar.gz: 177f0fd263be1d0a6b14f9bd298e84e6fa440868bedd79cd189eba894b67eb08
5
5
  SHA512:
6
- metadata.gz: beee15cfed06fbee80b9ac7b3bd389e8fb0c5ca6f290a81cdb2f6389293bc32859c555a00483c3456854fb5e04ff6d64e23f272a17995f5677f544bd0916d966
7
- data.tar.gz: 7152b896548b45f1c1dbeb973e6c10efbed2e76a0c2a9082aa0c6f1e24475a04e6b4537cff1d5d2265b1ce249fb4dddfd136cbbeb5808e870ff70434a033fb43
6
+ metadata.gz: 724d042883e32d4db18e8d2ebb64bade15f2e8fd4e6e804d041265ab2fb115c4453a7751690452fd672018b0600859e2991b47eb1b1e63bebf8cb8f4c828e862
7
+ data.tar.gz: d8b1493958dcaca358883935d432f92afe80456d8e1fdcba09ea54f20c4487b381dc338a757da55a5f3c3362492d8dd4c530737573844673272b57b09e8ab09c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 4.19.1
2
+ - Plugin version bump to pick up docs fix in [#199](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/199) required to clear build error in docgen. [#200](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/200)
3
+
4
+ ## 4.19.0
5
+ - Added `search_api` option to support `search_after` and `scroll` [#198](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/198)
6
+ - The default value `auto` uses `search_after` for Elasticsearch >= 8, otherwise, fall back to `scroll`
7
+
1
8
  ## 4.18.0
2
9
  - Added request header `Elastic-Api-Version` for serverless [#195](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/195)
3
10
 
data/docs/index.asciidoc CHANGED
@@ -118,6 +118,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
118
118
  | <<plugins-{type}s-{plugin}-request_timeout_seconds>> | <<number,number>>|No
119
119
  | <<plugins-{type}s-{plugin}-schedule>> |<<string,string>>|No
120
120
  | <<plugins-{type}s-{plugin}-scroll>> |<<string,string>>|No
121
+ | <<plugins-{type}s-{plugin}-search_api>> |<<string,string>>, one of `["auto", "search_after", "scroll"]`|No
121
122
  | <<plugins-{type}s-{plugin}-size>> |<<number,number>>|No
122
123
  | <<plugins-{type}s-{plugin}-slices>> |<<number,number>>|No
123
124
  | <<plugins-{type}s-{plugin}-ssl_certificate>> |<<path,path>>|No
@@ -333,6 +334,9 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`.
333
334
  The query to be executed. Read the {ref}/query-dsl.html[Elasticsearch query DSL
334
335
  documentation] for more information.
335
336
 
337
+ When <<plugins-{type}s-{plugin}-search_api>> resolves to `search_after` and the query does not specify `sort`,
338
+ the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more.
339
+
336
340
  [id="plugins-{type}s-{plugin}-request_timeout_seconds"]
337
341
  ===== `request_timeout_seconds`
338
342
 
@@ -377,6 +381,19 @@ This parameter controls the keepalive time in seconds of the scrolling
377
381
  request and initiates the scrolling process. The timeout applies per
378
382
  round trip (i.e. between the previous scroll request, to the next).
379
383
 
384
+ [id="plugins-{type}s-{plugin}-search_api"]
385
+ ===== `search_api`
386
+
387
+ * Value can be any of: `auto`, `search_after`, `scroll`
388
+ * Default value is `auto`
389
+
390
+ With `auto` the plugin uses the `search_after` parameter for Elasticsearch version `8.0.0` or higher, otherwise the `scroll` API is used instead.
391
+
392
+ `search_after` uses {ref}/point-in-time-api.html#point-in-time-api[point in time] and sort value to search.
393
+ The query requires at least one `sort` field, as described in the <<plugins-{type}s-{plugin}-query>> parameter.
394
+
395
+ `scroll` uses {ref}/paginate-search-results.html#scroll-search-results[scroll] API to search, which is no longer recommended.
396
+
380
397
  [id="plugins-{type}s-{plugin}-size"]
381
398
  ===== `size`
382
399
 
@@ -0,0 +1,231 @@
1
+ require 'logstash/helpers/loggable_try'
2
+
3
+ module LogStash
4
+ module Inputs
5
+ class Elasticsearch
6
+ class PaginatedSearch
7
+ include LogStash::Util::Loggable
8
+
9
+ def initialize(client, plugin)
10
+ @client = client
11
+ @plugin_params = plugin.params
12
+
13
+ @index = @plugin_params["index"]
14
+ @query = LogStash::Json.load(@plugin_params["query"])
15
+ @scroll = @plugin_params["scroll"]
16
+ @size = @plugin_params["size"]
17
+ @slices = @plugin_params["slices"]
18
+ @retries = @plugin_params["retries"]
19
+
20
+ @plugin = plugin
21
+ @pipeline_id = plugin.pipeline_id
22
+ end
23
+
24
+ def do_run(output_queue)
25
+ return retryable_search(output_queue) if @slices.nil? || @slices <= 1
26
+
27
+ retryable_slice_search(output_queue)
28
+ end
29
+
30
+ def retryable(job_name, &block)
31
+ stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name)
32
+ stud_try.try((@retries + 1).times) { yield }
33
+ rescue => e
34
+ error_details = {:message => e.message, :cause => e.cause}
35
+ error_details[:backtrace] = e.backtrace if logger.debug?
36
+ logger.error("Tried #{job_name} unsuccessfully", error_details)
37
+ end
38
+
39
+ def retryable_search(output_queue)
40
+ raise NotImplementedError
41
+ end
42
+
43
+ def retryable_slice_search(output_queue)
44
+ raise NotImplementedError
45
+ end
46
+ end
47
+
48
+ class Scroll < PaginatedSearch
49
+ SCROLL_JOB = "scroll paginated search"
50
+
51
+ def search_options(slice_id)
52
+ query = @query
53
+ query = @query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
54
+ {
55
+ :index => @index,
56
+ :scroll => @scroll,
57
+ :size => @size,
58
+ :body => LogStash::Json.dump(query)
59
+ }
60
+ end
61
+
62
+ def initial_search(slice_id)
63
+ options = search_options(slice_id)
64
+ @client.search(options)
65
+ end
66
+
67
+ def next_page(scroll_id)
68
+ @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
69
+ end
70
+
71
+ def process_page(output_queue)
72
+ r = yield
73
+ r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) }
74
+ [r['hits']['hits'].any?, r['_scroll_id']]
75
+ end
76
+
77
+ def search(output_queue, slice_id=nil)
78
+ log_details = {}
79
+ log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil?
80
+
81
+ logger.info("Query start", log_details)
82
+ has_hits, scroll_id = process_page(output_queue) { initial_search(slice_id) }
83
+
84
+ while has_hits && scroll_id && !@plugin.stop?
85
+ logger.debug("Query progress", log_details)
86
+ has_hits, scroll_id = process_page(output_queue) { next_page(scroll_id) }
87
+ end
88
+
89
+ logger.info("Query completed", log_details)
90
+ ensure
91
+ clear(scroll_id)
92
+ end
93
+
94
+ def retryable_search(output_queue, slice_id=nil)
95
+ retryable(SCROLL_JOB) do
96
+ search(output_queue, slice_id)
97
+ end
98
+ end
99
+
100
+ def retryable_slice_search(output_queue)
101
+ logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
102
+
103
+ @slices.times.map do |slice_id|
104
+ Thread.new do
105
+ LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
106
+ retryable_search(output_queue, slice_id)
107
+ end
108
+ end.map(&:join)
109
+
110
+ logger.trace("#{@slices} slices completed")
111
+ end
112
+
113
+ def clear(scroll_id)
114
+ @client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id
115
+ rescue => e
116
+ # ignore & log any clear_scroll errors
117
+ logger.debug("Ignoring clear_scroll exception", message: e.message, exception: e.class)
118
+ end
119
+ end
120
+
121
+ class SearchAfter < PaginatedSearch
122
+ PIT_JOB = "create point in time (PIT)"
123
+ SEARCH_AFTER_JOB = "search_after paginated search"
124
+
125
+ def pit?(id)
126
+ !!id&.is_a?(String)
127
+ end
128
+
129
+ def create_pit
130
+ logger.info("Create point in time (PIT)")
131
+ r = @client.open_point_in_time(index: @index, keep_alive: @scroll)
132
+ r['id']
133
+ end
134
+
135
+ def search_options(pit_id: , search_after: nil, slice_id: nil)
136
+ body = @query.merge({
137
+ :pit => {
138
+ :id => pit_id,
139
+ :keep_alive => @scroll
140
+ }
141
+ })
142
+
143
+ # search_after requires at least a sort field explicitly
144
+ # we add default sort "_shard_doc": "asc" if the query doesn't have any sort field
145
+ # by default, ES adds the same implicitly on top of the provided "sort"
146
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/paginate-search-results.html#CO201-2
147
+ body = body.merge(:sort => {"_shard_doc": "asc"}) if @query&.dig("sort").nil?
148
+
149
+ body = body.merge(:search_after => search_after) unless search_after.nil?
150
+ body = body.merge(:slice => {:id => slice_id, :max => @slices}) unless slice_id.nil?
151
+ {
152
+ :size => @size,
153
+ :body => body
154
+ }
155
+ end
156
+
157
+ def next_page(pit_id: , search_after: nil, slice_id: nil)
158
+ options = search_options(pit_id: pit_id, search_after: search_after, slice_id: slice_id)
159
+ logger.trace("search options", options)
160
+ @client.search(options)
161
+ end
162
+
163
+ def process_page(output_queue)
164
+ r = yield
165
+ r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) }
166
+
167
+ has_hits = r['hits']['hits'].any?
168
+ search_after = r['hits']['hits'][-1]['sort'] rescue nil
169
+ logger.warn("Query got data but the sort value is empty") if has_hits && search_after.nil?
170
+ [ has_hits, search_after ]
171
+ end
172
+
173
+ def with_pit
174
+ pit_id = retryable(PIT_JOB) { create_pit }
175
+ yield pit_id if pit?(pit_id)
176
+ ensure
177
+ clear(pit_id)
178
+ end
179
+
180
+ def search(output_queue:, slice_id: nil, pit_id:)
181
+ log_details = {}
182
+ log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil?
183
+ logger.info("Query start", log_details)
184
+
185
+ has_hits = true
186
+ search_after = nil
187
+
188
+ while has_hits && !@plugin.stop?
189
+ logger.debug("Query progress", log_details)
190
+ has_hits, search_after = process_page(output_queue) do
191
+ next_page(pit_id: pit_id, search_after: search_after, slice_id: slice_id)
192
+ end
193
+ end
194
+
195
+ logger.info("Query completed", log_details)
196
+ end
197
+
198
+ def retryable_search(output_queue)
199
+ with_pit do |pit_id|
200
+ retryable(SEARCH_AFTER_JOB) do
201
+ search(output_queue: output_queue, pit_id: pit_id)
202
+ end
203
+ end
204
+ end
205
+
206
+ def retryable_slice_search(output_queue)
207
+ with_pit do |pit_id|
208
+ @slices.times.map do |slice_id|
209
+ Thread.new do
210
+ LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
211
+ retryable(SEARCH_AFTER_JOB) do
212
+ search(output_queue: output_queue, slice_id: slice_id, pit_id: pit_id)
213
+ end
214
+ end
215
+ end.map(&:join)
216
+ end
217
+
218
+ logger.trace("#{@slices} slices completed")
219
+ end
220
+
221
+ def clear(pit_id)
222
+ logger.info("Closing point in time (PIT)")
223
+ @client.close_point_in_time(:body => {:id => pit_id} ) if pit?(pit_id)
224
+ rescue => e
225
+ logger.debug("Ignoring close_point_in_time exception", message: e.message, exception: e.class)
226
+ end
227
+ end
228
+
229
+ end
230
+ end
231
+ end
@@ -11,7 +11,6 @@ require 'logstash/plugin_mixins/ca_trusted_fingerprint_support'
11
11
  require "logstash/plugin_mixins/scheduler"
12
12
  require "logstash/plugin_mixins/normalize_config_support"
13
13
  require "base64"
14
- require 'logstash/helpers/loggable_try'
15
14
 
16
15
  require "elasticsearch"
17
16
  require "elasticsearch/transport/transport/http/manticore"
@@ -74,6 +73,8 @@ require_relative "elasticsearch/patches/_elasticsearch_transport_connections_sel
74
73
  #
75
74
  class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
76
75
 
76
+ require 'logstash/inputs/elasticsearch/paginated_search'
77
+
77
78
  include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
78
79
  include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck
79
80
 
@@ -106,6 +107,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
106
107
  # The number of retries to run the query. If the query fails after all retries, it logs an error message.
107
108
  config :retries, :validate => :number, :default => 0
108
109
 
110
+ # Default `auto` will use `search_after` api for Elasticsearch 8 and use `scroll` api for 7
111
+ # Set to scroll to fallback to previous version
112
+ config :search_api, :validate => %w[auto search_after scroll], :default => "auto"
113
+
109
114
  # This parameter controls the keepalive time in seconds of the scrolling
110
115
  # request and initiates the scrolling process. The timeout applies per
111
116
  # round trip (i.e. between the previous scroll request, to the next).
@@ -321,93 +326,21 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
321
326
 
322
327
  setup_serverless
323
328
 
329
+ setup_search_api
330
+
324
331
  @client
325
332
  end
326
333
 
327
334
 
328
335
  def run(output_queue)
329
336
  if @schedule
330
- scheduler.cron(@schedule) { do_run(output_queue) }
337
+ scheduler.cron(@schedule) { @paginated_search.do_run(output_queue) }
331
338
  scheduler.join
332
339
  else
333
- do_run(output_queue)
334
- end
335
- end
336
-
337
- private
338
- JOB_NAME = "run query"
339
- def do_run(output_queue)
340
- # if configured to run a single slice, don't bother spinning up threads
341
- if @slices.nil? || @slices <= 1
342
- return retryable(JOB_NAME) do
343
- do_run_slice(output_queue)
344
- end
345
- end
346
-
347
- logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
348
-
349
-
350
- @slices.times.map do |slice_id|
351
- Thread.new do
352
- LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
353
- retryable(JOB_NAME) do
354
- do_run_slice(output_queue, slice_id)
355
- end
356
- end
357
- end.map(&:join)
358
-
359
- logger.trace("#{@slices} slices completed")
360
- end
361
-
362
- def retryable(job_name, &block)
363
- begin
364
- stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name)
365
- stud_try.try((@retries + 1).times) { yield }
366
- rescue => e
367
- error_details = {:message => e.message, :cause => e.cause}
368
- error_details[:backtrace] = e.backtrace if logger.debug?
369
- logger.error("Tried #{job_name} unsuccessfully", error_details)
340
+ @paginated_search.do_run(output_queue)
370
341
  end
371
342
  end
372
343
 
373
- def do_run_slice(output_queue, slice_id=nil)
374
- slice_query = @base_query
375
- slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
376
-
377
- slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) )
378
-
379
- logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil?
380
-
381
- begin
382
- r = search_request(slice_options)
383
-
384
- r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
385
- logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil?
386
-
387
- has_hits = r['hits']['hits'].any?
388
- scroll_id = r['_scroll_id']
389
-
390
- while has_hits && scroll_id && !stop?
391
- has_hits, scroll_id = process_next_scroll(output_queue, scroll_id)
392
- logger.debug("Slice progress", slice_id: slice_id, slices: @slices) if logger.debug? && slice_id
393
- end
394
- logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil?
395
- ensure
396
- clear_scroll(scroll_id)
397
- end
398
- end
399
-
400
- ##
401
- # @param output_queue [#<<]
402
- # @param scroll_id [String]: a scroll id to resume
403
- # @return [Array(Boolean,String)]: a tuple representing whether the response
404
- #
405
- def process_next_scroll(output_queue, scroll_id)
406
- r = scroll_request(scroll_id)
407
- r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
408
- [r['hits']['hits'].any?, r['_scroll_id']]
409
- end
410
-
411
344
  def push_hit(hit, output_queue)
412
345
  event = targeted_event_factory.new_event hit['_source']
413
346
  set_docinfo_fields(hit, event) if @docinfo
@@ -433,20 +366,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
433
366
  event.set(@docinfo_target, docinfo_target)
434
367
  end
435
368
 
436
- def clear_scroll(scroll_id)
437
- @client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id
438
- rescue => e
439
- # ignore & log any clear_scroll errors
440
- logger.warn("Ignoring clear_scroll exception", message: e.message, exception: e.class)
441
- end
442
-
443
- def scroll_request(scroll_id)
444
- @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
445
- end
446
-
447
- def search_request(options={})
448
- @client.search(options)
449
- end
369
+ private
450
370
 
451
371
  def hosts_default?(hosts)
452
372
  hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? )
@@ -677,6 +597,18 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
677
597
  raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch"
678
598
  end
679
599
 
600
+ def es_info
601
+ @es_info ||= @client.info
602
+ end
603
+
604
+ def es_version
605
+ @es_version ||= es_info&.dig('version', 'number')
606
+ end
607
+
608
+ def es_major_version
609
+ @es_major_version ||= es_version.split('.').first.to_i
610
+ end
611
+
680
612
  # recreate client with default header when it is serverless
681
613
  # verify the header by sending GET /
682
614
  def setup_serverless
@@ -691,13 +623,35 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
691
623
  end
692
624
 
693
625
  def build_flavor
694
- @build_flavor ||= @client.info&.dig('version', 'build_flavor')
626
+ @build_flavor ||= es_info&.dig('version', 'build_flavor')
695
627
  end
696
628
 
697
629
  def serverless?
698
630
  @is_serverless ||= (build_flavor == BUILD_FLAVOR_SERVERLESS)
699
631
  end
700
632
 
633
+ def setup_search_api
634
+ @resolved_search_api = if @search_api == "auto"
635
+ api = if es_major_version >= 8
636
+ "search_after"
637
+ else
638
+ "scroll"
639
+ end
640
+ logger.info("`search_api => auto` resolved to `#{api}`", :elasticsearch => es_version)
641
+ api
642
+ else
643
+ @search_api
644
+ end
645
+
646
+
647
+ @paginated_search = if @resolved_search_api == "search_after"
648
+ LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self)
649
+ else
650
+ logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8
651
+ LogStash::Inputs::Elasticsearch::Scroll.new(@client, self)
652
+ end
653
+ end
654
+
701
655
  module URIOrEmptyValidator
702
656
  ##
703
657
  # @override to provide :uri_or_empty validator
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-elasticsearch'
4
- s.version = '4.18.0'
4
+ s.version = '4.19.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Reads query results from an Elasticsearch cluster"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -18,7 +18,8 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
18
18
  let(:plugin) { described_class.new(config) }
19
19
  let(:queue) { Queue.new }
20
20
  let(:build_flavor) { "default" }
21
- let(:cluster_info) { {"version" => {"number" => "7.5.0", "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} }
21
+ let(:es_version) { "7.5.0" }
22
+ let(:cluster_info) { {"version" => {"number" => es_version, "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} }
22
23
 
23
24
  before(:each) do
24
25
  Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method
@@ -102,6 +103,26 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
102
103
  expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
103
104
  end
104
105
  end
106
+
107
+ context "search_api" do
108
+ before(:each) do
109
+ plugin.register
110
+ end
111
+
112
+ context "ES 8" do
113
+ let(:es_version) { "8.10.0" }
114
+ it "resolves `auto` to `search_after`" do
115
+ expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::SearchAfter
116
+ end
117
+ end
118
+
119
+ context "ES 7" do
120
+ let(:es_version) { "7.17.0" }
121
+ it "resolves `auto` to `scroll`" do
122
+ expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::Scroll
123
+ end
124
+ end
125
+ end
105
126
  end
106
127
 
107
128
  it_behaves_like "an interruptible input plugin" do
@@ -244,22 +265,24 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
244
265
 
245
266
  context 'with `slices => 1`' do
246
267
  let(:slices) { 1 }
268
+ before { plugin.register }
269
+
247
270
  it 'runs just one slice' do
248
- expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
271
+ expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil)
249
272
  expect(Thread).to_not receive(:new)
250
273
 
251
- plugin.register
252
274
  plugin.run([])
253
275
  end
254
276
  end
255
277
 
256
278
  context 'without slices directive' do
257
279
  let(:config) { super().tap { |h| h.delete('slices') } }
280
+ before { plugin.register }
281
+
258
282
  it 'runs just one slice' do
259
- expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
283
+ expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil)
260
284
  expect(Thread).to_not receive(:new)
261
285
 
262
- plugin.register
263
286
  plugin.run([])
264
287
  end
265
288
  end
@@ -267,13 +290,14 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
267
290
  2.upto(8) do |slice_count|
268
291
  context "with `slices => #{slice_count}`" do
269
292
  let(:slices) { slice_count }
293
+ before { plugin.register }
294
+
270
295
  it "runs #{slice_count} independent slices" do
271
296
  expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times
272
297
  slice_count.times do |slice_id|
273
- expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), slice_id)
298
+ expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), slice_id)
274
299
  end
275
300
 
276
- plugin.register
277
301
  plugin.run([])
278
302
  end
279
303
  end
@@ -399,8 +423,8 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
399
423
  expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
400
424
  expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1)
401
425
 
402
- synchronize_method!(plugin, :scroll_request)
403
- synchronize_method!(plugin, :search_request)
426
+ synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page)
427
+ synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search)
404
428
  end
405
429
 
406
430
  let(:client) { Elasticsearch::Client.new }
@@ -469,14 +493,14 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
469
493
  expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
470
494
  expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom")
471
495
 
472
- synchronize_method!(plugin, :scroll_request)
473
- synchronize_method!(plugin, :search_request)
496
+ synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page)
497
+ synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search)
474
498
  end
475
499
 
476
500
  let(:client) { Elasticsearch::Client.new }
477
501
 
478
502
  it 'insert event to queue without waiting other slices' do
479
- expect(plugin).to receive(:do_run_slice).twice.and_wrap_original do |m, *args|
503
+ expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).twice.and_wrap_original do |m, *args|
480
504
  q = args[0]
481
505
  slice_id = args[1]
482
506
  if slice_id == 0
@@ -996,7 +1020,7 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
996
1020
 
997
1021
  it "should properly schedule" do
998
1022
  begin
999
- expect(plugin).to receive(:do_run) {
1023
+ expect(plugin.instance_variable_get(:@paginated_search)).to receive(:do_run) {
1000
1024
  queue << LogStash::Event.new({})
1001
1025
  }.at_least(:twice)
1002
1026
  runner = Thread.start { plugin.run(queue) }
@@ -1013,46 +1037,7 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
1013
1037
  end
1014
1038
 
1015
1039
  context "retries" do
1016
- let(:mock_response) do
1017
- {
1018
- "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g",
1019
- "took" => 27,
1020
- "timed_out" => false,
1021
- "_shards" => {
1022
- "total" => 169,
1023
- "successful" => 169,
1024
- "failed" => 0
1025
- },
1026
- "hits" => {
1027
- "total" => 1,
1028
- "max_score" => 1.0,
1029
- "hits" => [ {
1030
- "_index" => "logstash-2014.10.12",
1031
- "_type" => "logs",
1032
- "_id" => "C5b2xLQwTZa76jBmHIbwHQ",
1033
- "_score" => 1.0,
1034
- "_source" => { "message" => ["ohayo"] }
1035
- } ]
1036
- }
1037
- }
1038
- end
1039
-
1040
- let(:mock_scroll_response) do
1041
- {
1042
- "_scroll_id" => "r453Wc1jh0caLJhSDg",
1043
- "hits" => { "hits" => [] }
1044
- }
1045
- end
1046
-
1047
- before(:each) do
1048
- client = Elasticsearch::Client.new
1049
- allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
1050
- allow(client).to receive(:search).with(any_args).and_return(mock_response)
1051
- allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response)
1052
- allow(client).to receive(:clear_scroll).and_return(nil)
1053
- allow(client).to receive(:ping)
1054
- end
1055
-
1040
+ let(:client) { Elasticsearch::Client.new }
1056
1041
  let(:config) do
1057
1042
  {
1058
1043
  "hosts" => ["localhost"],
@@ -1061,29 +1046,98 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
1061
1046
  }
1062
1047
  end
1063
1048
 
1064
- it "retry and log error when all search request fail" do
1065
- expect(plugin.logger).to receive(:error).with(/Tried .* unsuccessfully/,
1066
- hash_including(:message => 'Manticore::UnknownException'))
1067
- expect(plugin.logger).to receive(:warn).twice.with(/Attempt to .* but failed/,
1068
- hash_including(:exception => "Manticore::UnknownException"))
1069
- expect(plugin).to receive(:search_request).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice)
1049
+ shared_examples "a retryable plugin" do
1050
+ it "retry and log error when all search request fail" do
1051
+ expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), instance_of(Integer), instance_of(String)).twice
1052
+ expect(client).to receive(:search).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice)
1070
1053
 
1071
- plugin.register
1054
+ plugin.register
1055
+
1056
+ expect{ plugin.run(queue) }.not_to raise_error
1057
+ end
1058
+
1059
+ it "retry successfully when search request fail for one time" do
1060
+ expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), 1, instance_of(String))
1061
+ expect(client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException)
1062
+ expect(client).to receive(:search).with(instance_of(Hash)).once.and_return(search_response)
1072
1063
 
1073
- expect{ plugin.run(queue) }.not_to raise_error
1074
- expect(queue.size).to eq(0)
1064
+ plugin.register
1065
+
1066
+ expect{ plugin.run(queue) }.not_to raise_error
1067
+ end
1075
1068
  end
1076
1069
 
1077
- it "retry successfully when search request fail for one time" do
1078
- expect(plugin.logger).to receive(:warn).once.with(/Attempt to .* but failed/,
1079
- hash_including(:exception => "Manticore::UnknownException"))
1080
- expect(plugin).to receive(:search_request).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException)
1081
- expect(plugin).to receive(:search_request).with(instance_of(Hash)).once.and_call_original
1070
+ describe "scroll" do
1071
+ let(:search_response) do
1072
+ {
1073
+ "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g",
1074
+ "took" => 27,
1075
+ "timed_out" => false,
1076
+ "_shards" => {
1077
+ "total" => 169,
1078
+ "successful" => 169,
1079
+ "failed" => 0
1080
+ },
1081
+ "hits" => {
1082
+ "total" => 1,
1083
+ "max_score" => 1.0,
1084
+ "hits" => [ {
1085
+ "_index" => "logstash-2014.10.12",
1086
+ "_type" => "logs",
1087
+ "_id" => "C5b2xLQwTZa76jBmHIbwHQ",
1088
+ "_score" => 1.0,
1089
+ "_source" => { "message" => ["ohayo"] }
1090
+ } ]
1091
+ }
1092
+ }
1093
+ end
1082
1094
 
1083
- plugin.register
1095
+ let(:empty_scroll_response) do
1096
+ {
1097
+ "_scroll_id" => "r453Wc1jh0caLJhSDg",
1098
+ "hits" => { "hits" => [] }
1099
+ }
1100
+ end
1101
+
1102
+ before(:each) do
1103
+ allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
1104
+ allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(empty_scroll_response)
1105
+ allow(client).to receive(:clear_scroll).and_return(nil)
1106
+ allow(client).to receive(:ping)
1107
+ end
1108
+
1109
+ it_behaves_like "a retryable plugin"
1110
+ end
1111
+
1112
+ describe "search_after" do
1113
+ let(:es_version) { "8.10.0" }
1114
+ let(:config) { super().merge({ "search_api" => "search_after" }) }
1115
+
1116
+ let(:search_response) do
1117
+ {
1118
+ "took" => 27,
1119
+ "timed_out" => false,
1120
+ "_shards" => {
1121
+ "total" => 169,
1122
+ "successful" => 169,
1123
+ "failed" => 0
1124
+ },
1125
+ "hits" => {
1126
+ "total" => 1,
1127
+ "max_score" => 1.0,
1128
+ "hits" => [ ] # empty hits to break the loop
1129
+ }
1130
+ }
1131
+ end
1132
+
1133
+ before(:each) do
1134
+ expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
1135
+ expect(client).to receive(:open_point_in_time).once.and_return({ "id" => "cXVlcnlUaGVuRmV0Y2g"})
1136
+ expect(client).to receive(:close_point_in_time).once.and_return(nil)
1137
+ expect(client).to receive(:ping)
1138
+ end
1084
1139
 
1085
- expect{ plugin.run(queue) }.not_to raise_error
1086
- expect(queue.size).to eq(1)
1140
+ it_behaves_like "a retryable plugin"
1087
1141
  end
1088
1142
  end
1089
1143
 
@@ -20,10 +20,7 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
20
20
  let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' }
21
21
  let(:ca_file) { "spec/fixtures/test_certs/ca.crt" }
22
22
 
23
- let(:es_url) do
24
- es_url = ESHelper.get_host_port
25
- SECURE_INTEGRATION ? "https://#{es_url}" : "http://#{es_url}"
26
- end
23
+ let(:es_url) { "http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}" }
27
24
 
28
25
  let(:curl_args) do
29
26
  config['user'] ? "-u #{config['user']}:#{config['password']}" : ''
@@ -46,6 +43,8 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
46
43
  ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args
47
44
  # This can fail if there are no indexes, ignore failure.
48
45
  ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil
46
+ ESHelper.curl_and_get_json_response( "#{es_url}/logs", method: 'DELETE', args: curl_args) rescue nil
47
+ ESHelper.curl_and_get_json_response "#{es_url}/_refresh", method: 'POST', args: curl_args
49
48
  end
50
49
 
51
50
  shared_examples 'an elasticsearch index plugin' do
@@ -56,6 +55,7 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
56
55
  it 'should retrieve json event from elasticsearch' do
57
56
  queue = []
58
57
  plugin.run(queue)
58
+ expect(queue.size).to eq(10)
59
59
  event = queue.pop
60
60
  expect(event).to be_a(LogStash::Event)
61
61
  expect(event.get("response")).to eql(404)
@@ -63,10 +63,6 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
63
63
  end
64
64
 
65
65
  describe 'against an unsecured elasticsearch', integration: true do
66
- before(:each) do
67
- plugin.register
68
- end
69
-
70
66
  it_behaves_like 'an elasticsearch index plugin'
71
67
  end
72
68
 
@@ -136,4 +132,10 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
136
132
 
137
133
  end
138
134
 
135
+ describe 'slice', integration: true do
136
+ let(:config) { super().merge('slices' => 2, 'size' => 2) }
137
+ let(:plugin) { described_class.new(config) }
138
+
139
+ it_behaves_like 'an elasticsearch index plugin'
140
+ end
139
141
  end
@@ -0,0 +1,129 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "logstash/inputs/elasticsearch/paginated_search"
3
+
4
+ describe "Paginated search" do
5
+ let(:es_client) { double("Elasticsearch::Client") }
6
+ let(:settings) { { "index" => "logs", "query" => "{ \"sort\": [ \"_doc\" ] }", "scroll" => "1m", "retries" => 0, "size" => 1000 } }
7
+ let(:plugin) { double("LogStash::Inputs::Elasticsearch", params: settings, pipeline_id: "main", stop?: false) }
8
+ let(:pit_id) { "08fsAwILcmVzaGFyZC0yZmIWdzFnbl" }
9
+
10
+ describe "search after" do
11
+ subject do
12
+ LogStash::Inputs::Elasticsearch::SearchAfter.new(es_client, plugin)
13
+ end
14
+
15
+ describe "search options" do
16
+ context "query without sort" do
17
+ let(:settings) { super().merge({"query" => "{\"match_all\": {} }"}) }
18
+
19
+ it "adds default sort" do
20
+ options = subject.search_options(pit_id: pit_id)
21
+ expect(options[:body][:sort]).to match({"_shard_doc": "asc"})
22
+ end
23
+ end
24
+
25
+ context "customize settings" do
26
+ let(:size) { 2 }
27
+ let(:slices) { 4 }
28
+ let(:settings) { super().merge({"slices" => slices, "size" => size}) }
29
+
30
+ it "gives updated options" do
31
+ slice_id = 1
32
+ search_after = [0, 0]
33
+ options = subject.search_options(pit_id: pit_id, slice_id: slice_id, search_after: search_after)
34
+ expect(options[:size]).to match(size)
35
+ expect(options[:body][:slice]).to match({:id => slice_id, :max => slices})
36
+ expect(options[:body][:search_after]).to match(search_after)
37
+ end
38
+ end
39
+ end
40
+
41
+ describe "search" do
42
+ let(:queue) { double("queue") }
43
+ let(:doc1) do
44
+ {
45
+ "_index" => "logstash",
46
+ "_type" => "logs",
47
+ "_id" => "C5b2xLQwTZa76jBmHIbwHQ",
48
+ "_score" => 1.0,
49
+ "_source" => { "message" => ["Halloween"] },
50
+ "sort" => [0, 0]
51
+ }
52
+ end
53
+ let(:first_resp) do
54
+ {
55
+ "pit_id" => pit_id,
56
+ "took" => 27,
57
+ "timed_out" => false,
58
+ "_shards" => {
59
+ "total" => 2,
60
+ "successful" => 2,
61
+ "skipped" => 0,
62
+ "failed" => 0
63
+ },
64
+ "hits" => {
65
+ "total" => {
66
+ "value" => 500,
67
+ "relation" => "eq"
68
+ },
69
+ "hits" => [ doc1 ]
70
+ }
71
+ }
72
+ end
73
+ let(:last_resp) do
74
+ {
75
+ "pit_id" => pit_id,
76
+ "took" => 27,
77
+ "timed_out" => false,
78
+ "_shards" => {
79
+ "total" => 2,
80
+ "successful" => 2,
81
+ "skipped" => 0,
82
+ "failed" => 0
83
+ },
84
+ "hits" => {
85
+ "total" => {
86
+ "value" => 500,
87
+ "relation" => "eq"
88
+ },
89
+ "hits" => [ ] # empty hits to break the loop
90
+ }
91
+ }
92
+ end
93
+
94
+ context "happy case" do
95
+ it "runs" do
96
+ expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp)
97
+ expect(plugin).to receive(:push_hit).with(doc1, queue).once
98
+ subject.search(output_queue: queue, pit_id: pit_id)
99
+ end
100
+ end
101
+
102
+ context "with exception" do
103
+ it "closes pit" do
104
+ expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id})
105
+ expect(plugin).to receive(:push_hit).with(doc1, queue).once
106
+ expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_return(first_resp)
107
+ expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException)
108
+ expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil)
109
+ subject.retryable_search(queue)
110
+ end
111
+ end
112
+
113
+ context "with slices" do
114
+ let(:slices) { 2 }
115
+ let(:settings) { super().merge({"slices" => slices}) }
116
+
117
+ it "runs two slices" do
118
+ expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id})
119
+ expect(plugin).to receive(:push_hit).with(any_args).twice
120
+ expect(Thread).to receive(:new).and_call_original.exactly(slices).times
121
+ expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp, first_resp, last_resp)
122
+ expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil)
123
+ subject.retryable_slice_search(queue)
124
+ end
125
+ end
126
+ end
127
+ end
128
+
129
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.18.0
4
+ version: 4.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-29 00:00:00.000000000 Z
11
+ date: 2023-12-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -271,6 +271,7 @@ files:
271
271
  - docs/index.asciidoc
272
272
  - lib/logstash/helpers/loggable_try.rb
273
273
  - lib/logstash/inputs/elasticsearch.rb
274
+ - lib/logstash/inputs/elasticsearch/paginated_search.rb
274
275
  - lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb
275
276
  - lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb
276
277
  - logstash-input-elasticsearch.gemspec
@@ -283,6 +284,7 @@ files:
283
284
  - spec/inputs/elasticsearch_spec.rb
284
285
  - spec/inputs/elasticsearch_ssl_spec.rb
285
286
  - spec/inputs/integration/elasticsearch_spec.rb
287
+ - spec/inputs/paginated_search_spec.rb
286
288
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
287
289
  licenses:
288
290
  - Apache License (2.0)
@@ -318,3 +320,4 @@ test_files:
318
320
  - spec/inputs/elasticsearch_spec.rb
319
321
  - spec/inputs/elasticsearch_ssl_spec.rb
320
322
  - spec/inputs/integration/elasticsearch_spec.rb
323
+ - spec/inputs/paginated_search_spec.rb