logstash-input-elasticsearch 4.18.0 → 4.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/docs/index.asciidoc +17 -0
- data/lib/logstash/inputs/elasticsearch/paginated_search.rb +231 -0
- data/lib/logstash/inputs/elasticsearch.rb +46 -92
- data/logstash-input-elasticsearch.gemspec +1 -1
- data/spec/inputs/elasticsearch_spec.rb +124 -70
- data/spec/inputs/integration/elasticsearch_spec.rb +10 -8
- data/spec/inputs/paginated_search_spec.rb +129 -0
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: beb1b5f12797c3bbedff6d14b755d8c34ba6df8e369f3c82a2c94e8e9dccc68d
|
|
4
|
+
data.tar.gz: de066785c11786d2ae3d4f47eacbceb14dcd27b80b2f7e1285f59e873479363d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 53883f346badb770e189a1d9a7becbf21cfd1e5c34467b94ad8dc7ab84ea246aa2a96ec6009743f1a1ef1af3beb3cec96d91b5db9ca0a19fc35ba45ec66ba1c8
|
|
7
|
+
data.tar.gz: b6982521c0d4358a3da4c95eeca1443203b79c4463a9ae21631ba641259a3503b7a665d992c483fe6e695a5ed6b5639502b290f16fdca88fcae5def66311fef0
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
## 4.19.0
|
|
2
|
+
- Added `search_api` option to support `search_after` and `scroll` [#198](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/198)
|
|
3
|
+
- The default value `auto` uses `search_after` for Elasticsearch >= 8, otherwise, fall back to `scroll`
|
|
4
|
+
|
|
1
5
|
## 4.18.0
|
|
2
6
|
- Added request header `Elastic-Api-Version` for serverless [#195](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/195)
|
|
3
7
|
|
data/docs/index.asciidoc
CHANGED
|
@@ -118,6 +118,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
|
118
118
|
| <<plugins-{type}s-{plugin}-request_timeout_seconds>> | <<number,number>>|No
|
|
119
119
|
| <<plugins-{type}s-{plugin}-schedule>> |<<string,string>>|No
|
|
120
120
|
| <<plugins-{type}s-{plugin}-scroll>> |<<string,string>>|No
|
|
121
|
+
| <<plugins-{type}s-{plugin}-search_api>> |<<string,string>>, one of `["auto", "search_after", "scroll"]`|No
|
|
121
122
|
| <<plugins-{type}s-{plugin}-size>> |<<number,number>>|No
|
|
122
123
|
| <<plugins-{type}s-{plugin}-slices>> |<<number,number>>|No
|
|
123
124
|
| <<plugins-{type}s-{plugin}-ssl_certificate>> |<<path,path>>|No
|
|
@@ -333,6 +334,9 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`.
|
|
|
333
334
|
The query to be executed. Read the {ref}/query-dsl.html[Elasticsearch query DSL
|
|
334
335
|
documentation] for more information.
|
|
335
336
|
|
|
337
|
+
When <<plugins-{type}s-{plugin}-search_api>> resolves to `search_after` and the query does not specify `sort`,
|
|
338
|
+
the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more.
|
|
339
|
+
|
|
336
340
|
[id="plugins-{type}s-{plugin}-request_timeout_seconds"]
|
|
337
341
|
===== `request_timeout_seconds`
|
|
338
342
|
|
|
@@ -377,6 +381,19 @@ This parameter controls the keepalive time in seconds of the scrolling
|
|
|
377
381
|
request and initiates the scrolling process. The timeout applies per
|
|
378
382
|
round trip (i.e. between the previous scroll request, to the next).
|
|
379
383
|
|
|
384
|
+
[id="plugins-{type}s-{plugin}-seearch_api"]
|
|
385
|
+
===== `search_api`
|
|
386
|
+
|
|
387
|
+
* Value can be any of: `auto`, `search_after`, `scroll`
|
|
388
|
+
* Default value is `auto`
|
|
389
|
+
|
|
390
|
+
With `auto` the plugin uses the `search_after` parameter for Elasticsearch version `8.0.0` or higher, otherwise the `scroll` API is used instead.
|
|
391
|
+
|
|
392
|
+
`search_after` uses {ref}/point-in-time-api.html#point-in-time-api[point in time] and sort value to search.
|
|
393
|
+
The query requires at least one `sort` field, as described in the <<plugins-{type}s-{plugin}-query>> parameter.
|
|
394
|
+
|
|
395
|
+
`scroll` uses {ref}/paginate-search-results.html#scroll-search-results[scroll] API to search, which is no longer recommended.
|
|
396
|
+
|
|
380
397
|
[id="plugins-{type}s-{plugin}-size"]
|
|
381
398
|
===== `size`
|
|
382
399
|
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
require 'logstash/helpers/loggable_try'
|
|
2
|
+
|
|
3
|
+
module LogStash
|
|
4
|
+
module Inputs
|
|
5
|
+
class Elasticsearch
|
|
6
|
+
class PaginatedSearch
|
|
7
|
+
include LogStash::Util::Loggable
|
|
8
|
+
|
|
9
|
+
def initialize(client, plugin)
|
|
10
|
+
@client = client
|
|
11
|
+
@plugin_params = plugin.params
|
|
12
|
+
|
|
13
|
+
@index = @plugin_params["index"]
|
|
14
|
+
@query = LogStash::Json.load(@plugin_params["query"])
|
|
15
|
+
@scroll = @plugin_params["scroll"]
|
|
16
|
+
@size = @plugin_params["size"]
|
|
17
|
+
@slices = @plugin_params["slices"]
|
|
18
|
+
@retries = @plugin_params["retries"]
|
|
19
|
+
|
|
20
|
+
@plugin = plugin
|
|
21
|
+
@pipeline_id = plugin.pipeline_id
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def do_run(output_queue)
|
|
25
|
+
return retryable_search(output_queue) if @slices.nil? || @slices <= 1
|
|
26
|
+
|
|
27
|
+
retryable_slice_search(output_queue)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def retryable(job_name, &block)
|
|
31
|
+
stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name)
|
|
32
|
+
stud_try.try((@retries + 1).times) { yield }
|
|
33
|
+
rescue => e
|
|
34
|
+
error_details = {:message => e.message, :cause => e.cause}
|
|
35
|
+
error_details[:backtrace] = e.backtrace if logger.debug?
|
|
36
|
+
logger.error("Tried #{job_name} unsuccessfully", error_details)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def retryable_search(output_queue)
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def retryable_slice_search(output_queue)
|
|
44
|
+
raise NotImplementedError
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
class Scroll < PaginatedSearch
|
|
49
|
+
SCROLL_JOB = "scroll paginated search"
|
|
50
|
+
|
|
51
|
+
def search_options(slice_id)
|
|
52
|
+
query = @query
|
|
53
|
+
query = @query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
|
|
54
|
+
{
|
|
55
|
+
:index => @index,
|
|
56
|
+
:scroll => @scroll,
|
|
57
|
+
:size => @size,
|
|
58
|
+
:body => LogStash::Json.dump(query)
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def initial_search(slice_id)
|
|
63
|
+
options = search_options(slice_id)
|
|
64
|
+
@client.search(options)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def next_page(scroll_id)
|
|
68
|
+
@client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def process_page(output_queue)
|
|
72
|
+
r = yield
|
|
73
|
+
r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) }
|
|
74
|
+
[r['hits']['hits'].any?, r['_scroll_id']]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def search(output_queue, slice_id=nil)
|
|
78
|
+
log_details = {}
|
|
79
|
+
log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil?
|
|
80
|
+
|
|
81
|
+
logger.info("Query start", log_details)
|
|
82
|
+
has_hits, scroll_id = process_page(output_queue) { initial_search(slice_id) }
|
|
83
|
+
|
|
84
|
+
while has_hits && scroll_id && !@plugin.stop?
|
|
85
|
+
logger.debug("Query progress", log_details)
|
|
86
|
+
has_hits, scroll_id = process_page(output_queue) { next_page(scroll_id) }
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
logger.info("Query completed", log_details)
|
|
90
|
+
ensure
|
|
91
|
+
clear(scroll_id)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def retryable_search(output_queue, slice_id=nil)
|
|
95
|
+
retryable(SCROLL_JOB) do
|
|
96
|
+
search(output_queue, slice_id)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def retryable_slice_search(output_queue)
|
|
101
|
+
logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
|
|
102
|
+
|
|
103
|
+
@slices.times.map do |slice_id|
|
|
104
|
+
Thread.new do
|
|
105
|
+
LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
|
|
106
|
+
retryable_search(output_queue, slice_id)
|
|
107
|
+
end
|
|
108
|
+
end.map(&:join)
|
|
109
|
+
|
|
110
|
+
logger.trace("#{@slices} slices completed")
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def clear(scroll_id)
|
|
114
|
+
@client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id
|
|
115
|
+
rescue => e
|
|
116
|
+
# ignore & log any clear_scroll errors
|
|
117
|
+
logger.debug("Ignoring clear_scroll exception", message: e.message, exception: e.class)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
class SearchAfter < PaginatedSearch
|
|
122
|
+
PIT_JOB = "create point in time (PIT)"
|
|
123
|
+
SEARCH_AFTER_JOB = "search_after paginated search"
|
|
124
|
+
|
|
125
|
+
def pit?(id)
|
|
126
|
+
!!id&.is_a?(String)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def create_pit
|
|
130
|
+
logger.info("Create point in time (PIT)")
|
|
131
|
+
r = @client.open_point_in_time(index: @index, keep_alive: @scroll)
|
|
132
|
+
r['id']
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def search_options(pit_id: , search_after: nil, slice_id: nil)
|
|
136
|
+
body = @query.merge({
|
|
137
|
+
:pit => {
|
|
138
|
+
:id => pit_id,
|
|
139
|
+
:keep_alive => @scroll
|
|
140
|
+
}
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
# search_after requires at least a sort field explicitly
|
|
144
|
+
# we add default sort "_shard_doc": "asc" if the query doesn't have any sort field
|
|
145
|
+
# by default, ES adds the same implicitly on top of the provided "sort"
|
|
146
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.10/paginate-search-results.html#CO201-2
|
|
147
|
+
body = body.merge(:sort => {"_shard_doc": "asc"}) if @query&.dig("sort").nil?
|
|
148
|
+
|
|
149
|
+
body = body.merge(:search_after => search_after) unless search_after.nil?
|
|
150
|
+
body = body.merge(:slice => {:id => slice_id, :max => @slices}) unless slice_id.nil?
|
|
151
|
+
{
|
|
152
|
+
:size => @size,
|
|
153
|
+
:body => body
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def next_page(pit_id: , search_after: nil, slice_id: nil)
|
|
158
|
+
options = search_options(pit_id: pit_id, search_after: search_after, slice_id: slice_id)
|
|
159
|
+
logger.trace("search options", options)
|
|
160
|
+
@client.search(options)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def process_page(output_queue)
|
|
164
|
+
r = yield
|
|
165
|
+
r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) }
|
|
166
|
+
|
|
167
|
+
has_hits = r['hits']['hits'].any?
|
|
168
|
+
search_after = r['hits']['hits'][-1]['sort'] rescue nil
|
|
169
|
+
logger.warn("Query got data but the sort value is empty") if has_hits && search_after.nil?
|
|
170
|
+
[ has_hits, search_after ]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def with_pit
|
|
174
|
+
pit_id = retryable(PIT_JOB) { create_pit }
|
|
175
|
+
yield pit_id if pit?(pit_id)
|
|
176
|
+
ensure
|
|
177
|
+
clear(pit_id)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def search(output_queue:, slice_id: nil, pit_id:)
|
|
181
|
+
log_details = {}
|
|
182
|
+
log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil?
|
|
183
|
+
logger.info("Query start", log_details)
|
|
184
|
+
|
|
185
|
+
has_hits = true
|
|
186
|
+
search_after = nil
|
|
187
|
+
|
|
188
|
+
while has_hits && !@plugin.stop?
|
|
189
|
+
logger.debug("Query progress", log_details)
|
|
190
|
+
has_hits, search_after = process_page(output_queue) do
|
|
191
|
+
next_page(pit_id: pit_id, search_after: search_after, slice_id: slice_id)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
logger.info("Query completed", log_details)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def retryable_search(output_queue)
|
|
199
|
+
with_pit do |pit_id|
|
|
200
|
+
retryable(SEARCH_AFTER_JOB) do
|
|
201
|
+
search(output_queue: output_queue, pit_id: pit_id)
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def retryable_slice_search(output_queue)
|
|
207
|
+
with_pit do |pit_id|
|
|
208
|
+
@slices.times.map do |slice_id|
|
|
209
|
+
Thread.new do
|
|
210
|
+
LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
|
|
211
|
+
retryable(SEARCH_AFTER_JOB) do
|
|
212
|
+
search(output_queue: output_queue, slice_id: slice_id, pit_id: pit_id)
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end.map(&:join)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
logger.trace("#{@slices} slices completed")
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def clear(pit_id)
|
|
222
|
+
logger.info("Closing point in time (PIT)")
|
|
223
|
+
@client.close_point_in_time(:body => {:id => pit_id} ) if pit?(pit_id)
|
|
224
|
+
rescue => e
|
|
225
|
+
logger.debug("Ignoring close_point_in_time exception", message: e.message, exception: e.class)
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
@@ -11,7 +11,6 @@ require 'logstash/plugin_mixins/ca_trusted_fingerprint_support'
|
|
|
11
11
|
require "logstash/plugin_mixins/scheduler"
|
|
12
12
|
require "logstash/plugin_mixins/normalize_config_support"
|
|
13
13
|
require "base64"
|
|
14
|
-
require 'logstash/helpers/loggable_try'
|
|
15
14
|
|
|
16
15
|
require "elasticsearch"
|
|
17
16
|
require "elasticsearch/transport/transport/http/manticore"
|
|
@@ -74,6 +73,8 @@ require_relative "elasticsearch/patches/_elasticsearch_transport_connections_sel
|
|
|
74
73
|
#
|
|
75
74
|
class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
76
75
|
|
|
76
|
+
require 'logstash/inputs/elasticsearch/paginated_search'
|
|
77
|
+
|
|
77
78
|
include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
|
|
78
79
|
include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck
|
|
79
80
|
|
|
@@ -106,6 +107,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
|
106
107
|
# The number of retries to run the query. If the query fails after all retries, it logs an error message.
|
|
107
108
|
config :retries, :validate => :number, :default => 0
|
|
108
109
|
|
|
110
|
+
# Default `auto` will use `search_after` api for Elasticsearch 8 and use `scroll` api for 7
|
|
111
|
+
# Set to scroll to fallback to previous version
|
|
112
|
+
config :search_api, :validate => %w[auto search_after scroll], :default => "auto"
|
|
113
|
+
|
|
109
114
|
# This parameter controls the keepalive time in seconds of the scrolling
|
|
110
115
|
# request and initiates the scrolling process. The timeout applies per
|
|
111
116
|
# round trip (i.e. between the previous scroll request, to the next).
|
|
@@ -321,93 +326,21 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
|
321
326
|
|
|
322
327
|
setup_serverless
|
|
323
328
|
|
|
329
|
+
setup_search_api
|
|
330
|
+
|
|
324
331
|
@client
|
|
325
332
|
end
|
|
326
333
|
|
|
327
334
|
|
|
328
335
|
def run(output_queue)
|
|
329
336
|
if @schedule
|
|
330
|
-
scheduler.cron(@schedule) { do_run(output_queue) }
|
|
337
|
+
scheduler.cron(@schedule) { @paginated_search.do_run(output_queue) }
|
|
331
338
|
scheduler.join
|
|
332
339
|
else
|
|
333
|
-
do_run(output_queue)
|
|
334
|
-
end
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
private
|
|
338
|
-
JOB_NAME = "run query"
|
|
339
|
-
def do_run(output_queue)
|
|
340
|
-
# if configured to run a single slice, don't bother spinning up threads
|
|
341
|
-
if @slices.nil? || @slices <= 1
|
|
342
|
-
return retryable(JOB_NAME) do
|
|
343
|
-
do_run_slice(output_queue)
|
|
344
|
-
end
|
|
345
|
-
end
|
|
346
|
-
|
|
347
|
-
logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
@slices.times.map do |slice_id|
|
|
351
|
-
Thread.new do
|
|
352
|
-
LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}")
|
|
353
|
-
retryable(JOB_NAME) do
|
|
354
|
-
do_run_slice(output_queue, slice_id)
|
|
355
|
-
end
|
|
356
|
-
end
|
|
357
|
-
end.map(&:join)
|
|
358
|
-
|
|
359
|
-
logger.trace("#{@slices} slices completed")
|
|
360
|
-
end
|
|
361
|
-
|
|
362
|
-
def retryable(job_name, &block)
|
|
363
|
-
begin
|
|
364
|
-
stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name)
|
|
365
|
-
stud_try.try((@retries + 1).times) { yield }
|
|
366
|
-
rescue => e
|
|
367
|
-
error_details = {:message => e.message, :cause => e.cause}
|
|
368
|
-
error_details[:backtrace] = e.backtrace if logger.debug?
|
|
369
|
-
logger.error("Tried #{job_name} unsuccessfully", error_details)
|
|
340
|
+
@paginated_search.do_run(output_queue)
|
|
370
341
|
end
|
|
371
342
|
end
|
|
372
343
|
|
|
373
|
-
def do_run_slice(output_queue, slice_id=nil)
|
|
374
|
-
slice_query = @base_query
|
|
375
|
-
slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
|
|
376
|
-
|
|
377
|
-
slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) )
|
|
378
|
-
|
|
379
|
-
logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
|
380
|
-
|
|
381
|
-
begin
|
|
382
|
-
r = search_request(slice_options)
|
|
383
|
-
|
|
384
|
-
r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
|
|
385
|
-
logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
|
386
|
-
|
|
387
|
-
has_hits = r['hits']['hits'].any?
|
|
388
|
-
scroll_id = r['_scroll_id']
|
|
389
|
-
|
|
390
|
-
while has_hits && scroll_id && !stop?
|
|
391
|
-
has_hits, scroll_id = process_next_scroll(output_queue, scroll_id)
|
|
392
|
-
logger.debug("Slice progress", slice_id: slice_id, slices: @slices) if logger.debug? && slice_id
|
|
393
|
-
end
|
|
394
|
-
logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
|
395
|
-
ensure
|
|
396
|
-
clear_scroll(scroll_id)
|
|
397
|
-
end
|
|
398
|
-
end
|
|
399
|
-
|
|
400
|
-
##
|
|
401
|
-
# @param output_queue [#<<]
|
|
402
|
-
# @param scroll_id [String]: a scroll id to resume
|
|
403
|
-
# @return [Array(Boolean,String)]: a tuple representing whether the response
|
|
404
|
-
#
|
|
405
|
-
def process_next_scroll(output_queue, scroll_id)
|
|
406
|
-
r = scroll_request(scroll_id)
|
|
407
|
-
r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
|
|
408
|
-
[r['hits']['hits'].any?, r['_scroll_id']]
|
|
409
|
-
end
|
|
410
|
-
|
|
411
344
|
def push_hit(hit, output_queue)
|
|
412
345
|
event = targeted_event_factory.new_event hit['_source']
|
|
413
346
|
set_docinfo_fields(hit, event) if @docinfo
|
|
@@ -433,20 +366,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
|
433
366
|
event.set(@docinfo_target, docinfo_target)
|
|
434
367
|
end
|
|
435
368
|
|
|
436
|
-
|
|
437
|
-
@client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id
|
|
438
|
-
rescue => e
|
|
439
|
-
# ignore & log any clear_scroll errors
|
|
440
|
-
logger.warn("Ignoring clear_scroll exception", message: e.message, exception: e.class)
|
|
441
|
-
end
|
|
442
|
-
|
|
443
|
-
def scroll_request(scroll_id)
|
|
444
|
-
@client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
|
|
445
|
-
end
|
|
446
|
-
|
|
447
|
-
def search_request(options={})
|
|
448
|
-
@client.search(options)
|
|
449
|
-
end
|
|
369
|
+
private
|
|
450
370
|
|
|
451
371
|
def hosts_default?(hosts)
|
|
452
372
|
hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? )
|
|
@@ -677,6 +597,18 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
|
677
597
|
raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch"
|
|
678
598
|
end
|
|
679
599
|
|
|
600
|
+
def es_info
|
|
601
|
+
@es_info ||= @client.info
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
def es_version
|
|
605
|
+
@es_version ||= es_info&.dig('version', 'number')
|
|
606
|
+
end
|
|
607
|
+
|
|
608
|
+
def es_major_version
|
|
609
|
+
@es_major_version ||= es_version.split('.').first.to_i
|
|
610
|
+
end
|
|
611
|
+
|
|
680
612
|
# recreate client with default header when it is serverless
|
|
681
613
|
# verify the header by sending GET /
|
|
682
614
|
def setup_serverless
|
|
@@ -691,13 +623,35 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
|
691
623
|
end
|
|
692
624
|
|
|
693
625
|
def build_flavor
|
|
694
|
-
@build_flavor ||=
|
|
626
|
+
@build_flavor ||= es_info&.dig('version', 'build_flavor')
|
|
695
627
|
end
|
|
696
628
|
|
|
697
629
|
def serverless?
|
|
698
630
|
@is_serverless ||= (build_flavor == BUILD_FLAVOR_SERVERLESS)
|
|
699
631
|
end
|
|
700
632
|
|
|
633
|
+
def setup_search_api
|
|
634
|
+
@resolved_search_api = if @search_api == "auto"
|
|
635
|
+
api = if es_major_version >= 8
|
|
636
|
+
"search_after"
|
|
637
|
+
else
|
|
638
|
+
"scroll"
|
|
639
|
+
end
|
|
640
|
+
logger.info("`search_api => auto` resolved to `#{api}`", :elasticsearch => es_version)
|
|
641
|
+
api
|
|
642
|
+
else
|
|
643
|
+
@search_api
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
@paginated_search = if @resolved_search_api == "search_after"
|
|
648
|
+
LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self)
|
|
649
|
+
else
|
|
650
|
+
logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8
|
|
651
|
+
LogStash::Inputs::Elasticsearch::Scroll.new(@client, self)
|
|
652
|
+
end
|
|
653
|
+
end
|
|
654
|
+
|
|
701
655
|
module URIOrEmptyValidator
|
|
702
656
|
##
|
|
703
657
|
# @override to provide :uri_or_empty validator
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
|
|
3
3
|
s.name = 'logstash-input-elasticsearch'
|
|
4
|
-
s.version = '4.
|
|
4
|
+
s.version = '4.19.0'
|
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
|
6
6
|
s.summary = "Reads query results from an Elasticsearch cluster"
|
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
|
@@ -18,7 +18,8 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
18
18
|
let(:plugin) { described_class.new(config) }
|
|
19
19
|
let(:queue) { Queue.new }
|
|
20
20
|
let(:build_flavor) { "default" }
|
|
21
|
-
let(:
|
|
21
|
+
let(:es_version) { "7.5.0" }
|
|
22
|
+
let(:cluster_info) { {"version" => {"number" => es_version, "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} }
|
|
22
23
|
|
|
23
24
|
before(:each) do
|
|
24
25
|
Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method
|
|
@@ -102,6 +103,26 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
102
103
|
expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
|
|
103
104
|
end
|
|
104
105
|
end
|
|
106
|
+
|
|
107
|
+
context "search_api" do
|
|
108
|
+
before(:each) do
|
|
109
|
+
plugin.register
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
context "ES 8" do
|
|
113
|
+
let(:es_version) { "8.10.0" }
|
|
114
|
+
it "resolves `auto` to `search_after`" do
|
|
115
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::SearchAfter
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
context "ES 7" do
|
|
120
|
+
let(:es_version) { "7.17.0" }
|
|
121
|
+
it "resolves `auto` to `scroll`" do
|
|
122
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::Scroll
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
105
126
|
end
|
|
106
127
|
|
|
107
128
|
it_behaves_like "an interruptible input plugin" do
|
|
@@ -244,22 +265,24 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
244
265
|
|
|
245
266
|
context 'with `slices => 1`' do
|
|
246
267
|
let(:slices) { 1 }
|
|
268
|
+
before { plugin.register }
|
|
269
|
+
|
|
247
270
|
it 'runs just one slice' do
|
|
248
|
-
expect(plugin).to receive(:
|
|
271
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil)
|
|
249
272
|
expect(Thread).to_not receive(:new)
|
|
250
273
|
|
|
251
|
-
plugin.register
|
|
252
274
|
plugin.run([])
|
|
253
275
|
end
|
|
254
276
|
end
|
|
255
277
|
|
|
256
278
|
context 'without slices directive' do
|
|
257
279
|
let(:config) { super().tap { |h| h.delete('slices') } }
|
|
280
|
+
before { plugin.register }
|
|
281
|
+
|
|
258
282
|
it 'runs just one slice' do
|
|
259
|
-
expect(plugin).to receive(:
|
|
283
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil)
|
|
260
284
|
expect(Thread).to_not receive(:new)
|
|
261
285
|
|
|
262
|
-
plugin.register
|
|
263
286
|
plugin.run([])
|
|
264
287
|
end
|
|
265
288
|
end
|
|
@@ -267,13 +290,14 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
267
290
|
2.upto(8) do |slice_count|
|
|
268
291
|
context "with `slices => #{slice_count}`" do
|
|
269
292
|
let(:slices) { slice_count }
|
|
293
|
+
before { plugin.register }
|
|
294
|
+
|
|
270
295
|
it "runs #{slice_count} independent slices" do
|
|
271
296
|
expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times
|
|
272
297
|
slice_count.times do |slice_id|
|
|
273
|
-
expect(plugin).to receive(:
|
|
298
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), slice_id)
|
|
274
299
|
end
|
|
275
300
|
|
|
276
|
-
plugin.register
|
|
277
301
|
plugin.run([])
|
|
278
302
|
end
|
|
279
303
|
end
|
|
@@ -399,8 +423,8 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
399
423
|
expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
|
|
400
424
|
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1)
|
|
401
425
|
|
|
402
|
-
synchronize_method!(plugin, :
|
|
403
|
-
synchronize_method!(plugin, :
|
|
426
|
+
synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page)
|
|
427
|
+
synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search)
|
|
404
428
|
end
|
|
405
429
|
|
|
406
430
|
let(:client) { Elasticsearch::Client.new }
|
|
@@ -469,14 +493,14 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
469
493
|
expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
|
|
470
494
|
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom")
|
|
471
495
|
|
|
472
|
-
synchronize_method!(plugin, :
|
|
473
|
-
synchronize_method!(plugin, :
|
|
496
|
+
synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page)
|
|
497
|
+
synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search)
|
|
474
498
|
end
|
|
475
499
|
|
|
476
500
|
let(:client) { Elasticsearch::Client.new }
|
|
477
501
|
|
|
478
502
|
it 'insert event to queue without waiting other slices' do
|
|
479
|
-
expect(plugin).to receive(:
|
|
503
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).twice.and_wrap_original do |m, *args|
|
|
480
504
|
q = args[0]
|
|
481
505
|
slice_id = args[1]
|
|
482
506
|
if slice_id == 0
|
|
@@ -996,7 +1020,7 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
996
1020
|
|
|
997
1021
|
it "should properly schedule" do
|
|
998
1022
|
begin
|
|
999
|
-
expect(plugin).to receive(:do_run) {
|
|
1023
|
+
expect(plugin.instance_variable_get(:@paginated_search)).to receive(:do_run) {
|
|
1000
1024
|
queue << LogStash::Event.new({})
|
|
1001
1025
|
}.at_least(:twice)
|
|
1002
1026
|
runner = Thread.start { plugin.run(queue) }
|
|
@@ -1013,46 +1037,7 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
1013
1037
|
end
|
|
1014
1038
|
|
|
1015
1039
|
context "retries" do
|
|
1016
|
-
let(:
|
|
1017
|
-
{
|
|
1018
|
-
"_scroll_id" => "cXVlcnlUaGVuRmV0Y2g",
|
|
1019
|
-
"took" => 27,
|
|
1020
|
-
"timed_out" => false,
|
|
1021
|
-
"_shards" => {
|
|
1022
|
-
"total" => 169,
|
|
1023
|
-
"successful" => 169,
|
|
1024
|
-
"failed" => 0
|
|
1025
|
-
},
|
|
1026
|
-
"hits" => {
|
|
1027
|
-
"total" => 1,
|
|
1028
|
-
"max_score" => 1.0,
|
|
1029
|
-
"hits" => [ {
|
|
1030
|
-
"_index" => "logstash-2014.10.12",
|
|
1031
|
-
"_type" => "logs",
|
|
1032
|
-
"_id" => "C5b2xLQwTZa76jBmHIbwHQ",
|
|
1033
|
-
"_score" => 1.0,
|
|
1034
|
-
"_source" => { "message" => ["ohayo"] }
|
|
1035
|
-
} ]
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1038
|
-
end
|
|
1039
|
-
|
|
1040
|
-
let(:mock_scroll_response) do
|
|
1041
|
-
{
|
|
1042
|
-
"_scroll_id" => "r453Wc1jh0caLJhSDg",
|
|
1043
|
-
"hits" => { "hits" => [] }
|
|
1044
|
-
}
|
|
1045
|
-
end
|
|
1046
|
-
|
|
1047
|
-
before(:each) do
|
|
1048
|
-
client = Elasticsearch::Client.new
|
|
1049
|
-
allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
|
|
1050
|
-
allow(client).to receive(:search).with(any_args).and_return(mock_response)
|
|
1051
|
-
allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response)
|
|
1052
|
-
allow(client).to receive(:clear_scroll).and_return(nil)
|
|
1053
|
-
allow(client).to receive(:ping)
|
|
1054
|
-
end
|
|
1055
|
-
|
|
1040
|
+
let(:client) { Elasticsearch::Client.new }
|
|
1056
1041
|
let(:config) do
|
|
1057
1042
|
{
|
|
1058
1043
|
"hosts" => ["localhost"],
|
|
@@ -1061,29 +1046,98 @@ describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do
|
|
|
1061
1046
|
}
|
|
1062
1047
|
end
|
|
1063
1048
|
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
hash_including(:exception => "Manticore::UnknownException"))
|
|
1069
|
-
expect(plugin).to receive(:search_request).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice)
|
|
1049
|
+
shared_examples "a retryable plugin" do
|
|
1050
|
+
it "retry and log error when all search request fail" do
|
|
1051
|
+
expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), instance_of(Integer), instance_of(String)).twice
|
|
1052
|
+
expect(client).to receive(:search).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice)
|
|
1070
1053
|
|
|
1071
|
-
|
|
1054
|
+
plugin.register
|
|
1055
|
+
|
|
1056
|
+
expect{ plugin.run(queue) }.not_to raise_error
|
|
1057
|
+
end
|
|
1058
|
+
|
|
1059
|
+
it "retry successfully when search request fail for one time" do
|
|
1060
|
+
expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), 1, instance_of(String))
|
|
1061
|
+
expect(client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException)
|
|
1062
|
+
expect(client).to receive(:search).with(instance_of(Hash)).once.and_return(search_response)
|
|
1072
1063
|
|
|
1073
|
-
|
|
1074
|
-
|
|
1064
|
+
plugin.register
|
|
1065
|
+
|
|
1066
|
+
expect{ plugin.run(queue) }.not_to raise_error
|
|
1067
|
+
end
|
|
1075
1068
|
end
|
|
1076
1069
|
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1070
|
+
describe "scroll" do
|
|
1071
|
+
let(:search_response) do
|
|
1072
|
+
{
|
|
1073
|
+
"_scroll_id" => "cXVlcnlUaGVuRmV0Y2g",
|
|
1074
|
+
"took" => 27,
|
|
1075
|
+
"timed_out" => false,
|
|
1076
|
+
"_shards" => {
|
|
1077
|
+
"total" => 169,
|
|
1078
|
+
"successful" => 169,
|
|
1079
|
+
"failed" => 0
|
|
1080
|
+
},
|
|
1081
|
+
"hits" => {
|
|
1082
|
+
"total" => 1,
|
|
1083
|
+
"max_score" => 1.0,
|
|
1084
|
+
"hits" => [ {
|
|
1085
|
+
"_index" => "logstash-2014.10.12",
|
|
1086
|
+
"_type" => "logs",
|
|
1087
|
+
"_id" => "C5b2xLQwTZa76jBmHIbwHQ",
|
|
1088
|
+
"_score" => 1.0,
|
|
1089
|
+
"_source" => { "message" => ["ohayo"] }
|
|
1090
|
+
} ]
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
end
|
|
1082
1094
|
|
|
1083
|
-
|
|
1095
|
+
let(:empty_scroll_response) do
|
|
1096
|
+
{
|
|
1097
|
+
"_scroll_id" => "r453Wc1jh0caLJhSDg",
|
|
1098
|
+
"hits" => { "hits" => [] }
|
|
1099
|
+
}
|
|
1100
|
+
end
|
|
1101
|
+
|
|
1102
|
+
before(:each) do
|
|
1103
|
+
allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
|
|
1104
|
+
allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(empty_scroll_response)
|
|
1105
|
+
allow(client).to receive(:clear_scroll).and_return(nil)
|
|
1106
|
+
allow(client).to receive(:ping)
|
|
1107
|
+
end
|
|
1108
|
+
|
|
1109
|
+
it_behaves_like "a retryable plugin"
|
|
1110
|
+
end
|
|
1111
|
+
|
|
1112
|
+
describe "search_after" do
|
|
1113
|
+
let(:es_version) { "8.10.0" }
|
|
1114
|
+
let(:config) { super().merge({ "search_api" => "search_after" }) }
|
|
1115
|
+
|
|
1116
|
+
let(:search_response) do
|
|
1117
|
+
{
|
|
1118
|
+
"took" => 27,
|
|
1119
|
+
"timed_out" => false,
|
|
1120
|
+
"_shards" => {
|
|
1121
|
+
"total" => 169,
|
|
1122
|
+
"successful" => 169,
|
|
1123
|
+
"failed" => 0
|
|
1124
|
+
},
|
|
1125
|
+
"hits" => {
|
|
1126
|
+
"total" => 1,
|
|
1127
|
+
"max_score" => 1.0,
|
|
1128
|
+
"hits" => [ ] # empty hits to break the loop
|
|
1129
|
+
}
|
|
1130
|
+
}
|
|
1131
|
+
end
|
|
1132
|
+
|
|
1133
|
+
before(:each) do
|
|
1134
|
+
expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
|
|
1135
|
+
expect(client).to receive(:open_point_in_time).once.and_return({ "id" => "cXVlcnlUaGVuRmV0Y2g"})
|
|
1136
|
+
expect(client).to receive(:close_point_in_time).once.and_return(nil)
|
|
1137
|
+
expect(client).to receive(:ping)
|
|
1138
|
+
end
|
|
1084
1139
|
|
|
1085
|
-
|
|
1086
|
-
expect(queue.size).to eq(1)
|
|
1140
|
+
it_behaves_like "a retryable plugin"
|
|
1087
1141
|
end
|
|
1088
1142
|
end
|
|
1089
1143
|
|
|
@@ -20,10 +20,7 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
|
|
|
20
20
|
let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' }
|
|
21
21
|
let(:ca_file) { "spec/fixtures/test_certs/ca.crt" }
|
|
22
22
|
|
|
23
|
-
let(:es_url)
|
|
24
|
-
es_url = ESHelper.get_host_port
|
|
25
|
-
SECURE_INTEGRATION ? "https://#{es_url}" : "http://#{es_url}"
|
|
26
|
-
end
|
|
23
|
+
let(:es_url) { "http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}" }
|
|
27
24
|
|
|
28
25
|
let(:curl_args) do
|
|
29
26
|
config['user'] ? "-u #{config['user']}:#{config['password']}" : ''
|
|
@@ -46,6 +43,8 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
|
|
|
46
43
|
ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args
|
|
47
44
|
# This can fail if there are no indexes, ignore failure.
|
|
48
45
|
ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil
|
|
46
|
+
ESHelper.curl_and_get_json_response( "#{es_url}/logs", method: 'DELETE', args: curl_args) rescue nil
|
|
47
|
+
ESHelper.curl_and_get_json_response "#{es_url}/_refresh", method: 'POST', args: curl_args
|
|
49
48
|
end
|
|
50
49
|
|
|
51
50
|
shared_examples 'an elasticsearch index plugin' do
|
|
@@ -56,6 +55,7 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
|
|
|
56
55
|
it 'should retrieve json event from elasticsearch' do
|
|
57
56
|
queue = []
|
|
58
57
|
plugin.run(queue)
|
|
58
|
+
expect(queue.size).to eq(10)
|
|
59
59
|
event = queue.pop
|
|
60
60
|
expect(event).to be_a(LogStash::Event)
|
|
61
61
|
expect(event.get("response")).to eql(404)
|
|
@@ -63,10 +63,6 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
|
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
describe 'against an unsecured elasticsearch', integration: true do
|
|
66
|
-
before(:each) do
|
|
67
|
-
plugin.register
|
|
68
|
-
end
|
|
69
|
-
|
|
70
66
|
it_behaves_like 'an elasticsearch index plugin'
|
|
71
67
|
end
|
|
72
68
|
|
|
@@ -136,4 +132,10 @@ describe LogStash::Inputs::Elasticsearch, :integration => true do
|
|
|
136
132
|
|
|
137
133
|
end
|
|
138
134
|
|
|
135
|
+
describe 'slice', integration: true do
|
|
136
|
+
let(:config) { super().merge('slices' => 2, 'size' => 2) }
|
|
137
|
+
let(:plugin) { described_class.new(config) }
|
|
138
|
+
|
|
139
|
+
it_behaves_like 'an elasticsearch index plugin'
|
|
140
|
+
end
|
|
139
141
|
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
|
2
|
+
require "logstash/inputs/elasticsearch/paginated_search"
|
|
3
|
+
|
|
4
|
+
describe "Paginated search" do
|
|
5
|
+
let(:es_client) { double("Elasticsearch::Client") }
|
|
6
|
+
let(:settings) { { "index" => "logs", "query" => "{ \"sort\": [ \"_doc\" ] }", "scroll" => "1m", "retries" => 0, "size" => 1000 } }
|
|
7
|
+
let(:plugin) { double("LogStash::Inputs::Elasticsearch", params: settings, pipeline_id: "main", stop?: false) }
|
|
8
|
+
let(:pit_id) { "08fsAwILcmVzaGFyZC0yZmIWdzFnbl" }
|
|
9
|
+
|
|
10
|
+
describe "search after" do
|
|
11
|
+
subject do
|
|
12
|
+
LogStash::Inputs::Elasticsearch::SearchAfter.new(es_client, plugin)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
describe "search options" do
|
|
16
|
+
context "query without sort" do
|
|
17
|
+
let(:settings) { super().merge({"query" => "{\"match_all\": {} }"}) }
|
|
18
|
+
|
|
19
|
+
it "adds default sort" do
|
|
20
|
+
options = subject.search_options(pit_id: pit_id)
|
|
21
|
+
expect(options[:body][:sort]).to match({"_shard_doc": "asc"})
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
context "customize settings" do
|
|
26
|
+
let(:size) { 2 }
|
|
27
|
+
let(:slices) { 4 }
|
|
28
|
+
let(:settings) { super().merge({"slices" => slices, "size" => size}) }
|
|
29
|
+
|
|
30
|
+
it "gives updated options" do
|
|
31
|
+
slice_id = 1
|
|
32
|
+
search_after = [0, 0]
|
|
33
|
+
options = subject.search_options(pit_id: pit_id, slice_id: slice_id, search_after: search_after)
|
|
34
|
+
expect(options[:size]).to match(size)
|
|
35
|
+
expect(options[:body][:slice]).to match({:id => slice_id, :max => slices})
|
|
36
|
+
expect(options[:body][:search_after]).to match(search_after)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
describe "search" do
|
|
42
|
+
let(:queue) { double("queue") }
|
|
43
|
+
let(:doc1) do
|
|
44
|
+
{
|
|
45
|
+
"_index" => "logstash",
|
|
46
|
+
"_type" => "logs",
|
|
47
|
+
"_id" => "C5b2xLQwTZa76jBmHIbwHQ",
|
|
48
|
+
"_score" => 1.0,
|
|
49
|
+
"_source" => { "message" => ["Halloween"] },
|
|
50
|
+
"sort" => [0, 0]
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
let(:first_resp) do
|
|
54
|
+
{
|
|
55
|
+
"pit_id" => pit_id,
|
|
56
|
+
"took" => 27,
|
|
57
|
+
"timed_out" => false,
|
|
58
|
+
"_shards" => {
|
|
59
|
+
"total" => 2,
|
|
60
|
+
"successful" => 2,
|
|
61
|
+
"skipped" => 0,
|
|
62
|
+
"failed" => 0
|
|
63
|
+
},
|
|
64
|
+
"hits" => {
|
|
65
|
+
"total" => {
|
|
66
|
+
"value" => 500,
|
|
67
|
+
"relation" => "eq"
|
|
68
|
+
},
|
|
69
|
+
"hits" => [ doc1 ]
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
let(:last_resp) do
|
|
74
|
+
{
|
|
75
|
+
"pit_id" => pit_id,
|
|
76
|
+
"took" => 27,
|
|
77
|
+
"timed_out" => false,
|
|
78
|
+
"_shards" => {
|
|
79
|
+
"total" => 2,
|
|
80
|
+
"successful" => 2,
|
|
81
|
+
"skipped" => 0,
|
|
82
|
+
"failed" => 0
|
|
83
|
+
},
|
|
84
|
+
"hits" => {
|
|
85
|
+
"total" => {
|
|
86
|
+
"value" => 500,
|
|
87
|
+
"relation" => "eq"
|
|
88
|
+
},
|
|
89
|
+
"hits" => [ ] # empty hits to break the loop
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
context "happy case" do
|
|
95
|
+
it "runs" do
|
|
96
|
+
expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp)
|
|
97
|
+
expect(plugin).to receive(:push_hit).with(doc1, queue).once
|
|
98
|
+
subject.search(output_queue: queue, pit_id: pit_id)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
context "with exception" do
|
|
103
|
+
it "closes pit" do
|
|
104
|
+
expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id})
|
|
105
|
+
expect(plugin).to receive(:push_hit).with(doc1, queue).once
|
|
106
|
+
expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_return(first_resp)
|
|
107
|
+
expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException)
|
|
108
|
+
expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil)
|
|
109
|
+
subject.retryable_search(queue)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
context "with slices" do
|
|
114
|
+
let(:slices) { 2 }
|
|
115
|
+
let(:settings) { super().merge({"slices" => slices}) }
|
|
116
|
+
|
|
117
|
+
it "runs two slices" do
|
|
118
|
+
expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id})
|
|
119
|
+
expect(plugin).to receive(:push_hit).with(any_args).twice
|
|
120
|
+
expect(Thread).to receive(:new).and_call_original.exactly(slices).times
|
|
121
|
+
expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp, first_resp, last_resp)
|
|
122
|
+
expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil)
|
|
123
|
+
subject.retryable_slice_search(queue)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: logstash-input-elasticsearch
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.
|
|
4
|
+
version: 4.19.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Elastic
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-11-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -271,6 +271,7 @@ files:
|
|
|
271
271
|
- docs/index.asciidoc
|
|
272
272
|
- lib/logstash/helpers/loggable_try.rb
|
|
273
273
|
- lib/logstash/inputs/elasticsearch.rb
|
|
274
|
+
- lib/logstash/inputs/elasticsearch/paginated_search.rb
|
|
274
275
|
- lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb
|
|
275
276
|
- lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb
|
|
276
277
|
- logstash-input-elasticsearch.gemspec
|
|
@@ -283,6 +284,7 @@ files:
|
|
|
283
284
|
- spec/inputs/elasticsearch_spec.rb
|
|
284
285
|
- spec/inputs/elasticsearch_ssl_spec.rb
|
|
285
286
|
- spec/inputs/integration/elasticsearch_spec.rb
|
|
287
|
+
- spec/inputs/paginated_search_spec.rb
|
|
286
288
|
homepage: http://www.elastic.co/guide/en/logstash/current/index.html
|
|
287
289
|
licenses:
|
|
288
290
|
- Apache License (2.0)
|
|
@@ -318,3 +320,4 @@ test_files:
|
|
|
318
320
|
- spec/inputs/elasticsearch_spec.rb
|
|
319
321
|
- spec/inputs/elasticsearch_ssl_spec.rb
|
|
320
322
|
- spec/inputs/integration/elasticsearch_spec.rb
|
|
323
|
+
- spec/inputs/paginated_search_spec.rb
|