logstash-input-elasticsearch 4.2.1 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +25 -0
- data/lib/logstash/inputs/elasticsearch.rb +40 -4
- data/logstash-input-elasticsearch.gemspec +1 -1
- data/spec/inputs/elasticsearch_spec.rb +233 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d8999f6e5261a2aedcf91e63941d0dbd0088af5969c7a132fdb9b2c64100985
|
4
|
+
data.tar.gz: e85ebd29d645319b5f498ce44e6bbea68f752ab480917e20237ff5482fd57492
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de04e26035cb7a0ab9448f630f6d833894c9c0c74de60f7a781efdf0eebafdb7c54692a31790543ca019932bd84751ff67366bcff78f9a739967c5a8df627452
|
7
|
+
data.tar.gz: 1984940fed8ca921ef26e8e4c30ce3c5c6bda4c0ed681676ca657c92a32e120518f2e4ac58d51a71e6951fde2fcd529a22227f05687517559b65991145217392
|
data/CHANGELOG.md
CHANGED
data/docs/index.asciidoc
CHANGED
@@ -97,6 +97,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
97
97
|
| <<plugins-{type}s-{plugin}-schedule>> |<<string,string>>|No
|
98
98
|
| <<plugins-{type}s-{plugin}-scroll>> |<<string,string>>|No
|
99
99
|
| <<plugins-{type}s-{plugin}-size>> |<<number,number>>|No
|
100
|
+
| <<plugins-{type}s-{plugin}-slices>> |<<number,number>>|No
|
100
101
|
| <<plugins-{type}s-{plugin}-ssl>> |<<boolean,boolean>>|No
|
101
102
|
| <<plugins-{type}s-{plugin}-user>> |<<string,string>>|No
|
102
103
|
|=======================================================================
|
@@ -250,6 +251,30 @@ round trip (i.e. between the previous scroll request, to the next).
|
|
250
251
|
|
251
252
|
This allows you to set the maximum number of hits returned per scroll.
|
252
253
|
|
254
|
+
[id="plugins-{type}s-{plugin}-slices"]
|
255
|
+
===== `slices`
|
256
|
+
|
257
|
+
* Value type is <<number,number>>
|
258
|
+
* There is no default value.
|
259
|
+
* Sensible values range from 2 to about 8.
|
260
|
+
|
261
|
+
In some cases, it is possible to improve overall throughput by consuming multiple
|
262
|
+
distinct slices of a query simultaneously using the
|
263
|
+
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#sliced-scroll[Sliced Scroll API],
|
264
|
+
especially if the pipeline is spending significant time waiting on Elasticsearch
|
265
|
+
to provide results.
|
266
|
+
|
267
|
+
If set, the `slices` parameter tells the plugin how many slices to divide the work
|
268
|
+
into, and will produce events from the slices in parallel until all of them are done
|
269
|
+
scrolling.
|
270
|
+
|
271
|
+
NOTE: The Elasticsearch manual indicates that there can be _negative_ performance
|
272
|
+
implications to both the query and the Elasticsearch cluster when a scrolling
|
273
|
+
query uses more slices than shards in the index.
|
274
|
+
|
275
|
+
If the `slices` parameter is left unset, the plugin will _not_ inject slice
|
276
|
+
instructions into the query.
|
277
|
+
|
253
278
|
[id="plugins-{type}s-{plugin}-ssl"]
|
254
279
|
===== `ssl`
|
255
280
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/inputs/base"
|
3
3
|
require "logstash/namespace"
|
4
|
+
require "logstash/json"
|
4
5
|
require "base64"
|
5
6
|
|
6
7
|
# .Compatibility Note
|
@@ -83,6 +84,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
83
84
|
# round trip (i.e. between the previous scroll request, to the next).
|
84
85
|
config :scroll, :validate => :string, :default => "1m"
|
85
86
|
|
87
|
+
# This parameter controls the number of parallel slices to be consumed simultaneously
|
88
|
+
# by this pipeline input.
|
89
|
+
config :slices, :validate => :number
|
90
|
+
|
86
91
|
# If set, include Elasticsearch document information such as index, type, and
|
87
92
|
# the id in the event.
|
88
93
|
#
|
@@ -147,10 +152,14 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
147
152
|
|
148
153
|
@options = {
|
149
154
|
:index => @index,
|
150
|
-
:body => @query,
|
151
155
|
:scroll => @scroll,
|
152
156
|
:size => @size
|
153
157
|
}
|
158
|
+
@base_query = LogStash::Json.load(@query)
|
159
|
+
if @slices
|
160
|
+
@base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option")
|
161
|
+
@slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`")
|
162
|
+
end
|
154
163
|
|
155
164
|
transport_options = {}
|
156
165
|
|
@@ -196,16 +205,39 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
196
205
|
private
|
197
206
|
|
198
207
|
def do_run(output_queue)
|
199
|
-
#
|
200
|
-
|
208
|
+
# if configured to run a single slice, don't bother spinning up threads
|
209
|
+
return do_run_slice(output_queue) if @slices.nil? || @slices <= 1
|
210
|
+
|
211
|
+
logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
|
212
|
+
|
213
|
+
@slices.times.map do |slice_id|
|
214
|
+
Thread.new do
|
215
|
+
LogStash::Util::set_thread_name("#{@id}_slice_#{slice_id}")
|
216
|
+
do_run_slice(output_queue, slice_id)
|
217
|
+
end
|
218
|
+
end.map(&:join)
|
219
|
+
end
|
220
|
+
|
221
|
+
def do_run_slice(output_queue, slice_id=nil)
|
222
|
+
slice_query = @base_query
|
223
|
+
slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
|
224
|
+
|
225
|
+
slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) )
|
226
|
+
|
227
|
+
logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
228
|
+
r = search_request(slice_options)
|
201
229
|
|
202
230
|
r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
|
231
|
+
logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
232
|
+
|
203
233
|
has_hits = r['hits']['hits'].any?
|
204
234
|
|
205
|
-
while has_hits && !stop?
|
235
|
+
while has_hits && r['_scroll_id'] && !stop?
|
206
236
|
r = process_next_scroll(output_queue, r['_scroll_id'])
|
237
|
+
logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
207
238
|
has_hits = r['has_hits']
|
208
239
|
end
|
240
|
+
logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
209
241
|
end
|
210
242
|
|
211
243
|
def process_next_scroll(output_queue, scroll_id)
|
@@ -243,4 +275,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
243
275
|
def scroll_request scroll_id
|
244
276
|
@client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
|
245
277
|
end
|
278
|
+
|
279
|
+
def search_request(options)
|
280
|
+
@client.search(options)
|
281
|
+
end
|
246
282
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-elasticsearch'
|
4
|
-
s.version = '4.
|
4
|
+
s.version = '4.3.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Reads query results from an Elasticsearch cluster"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -84,6 +84,239 @@ describe LogStash::Inputs::Elasticsearch do
|
|
84
84
|
insist { event.get("message") } == [ "ohayo" ]
|
85
85
|
end
|
86
86
|
|
87
|
+
|
88
|
+
# This spec is an adapter-spec, ensuring that we send the right sequence of messages to our Elasticsearch Client
|
89
|
+
# to support sliced scrolling. The underlying implementation will spawn its own threads to consume, so we must be
|
90
|
+
# careful to use thread-safe constructs.
|
91
|
+
context "with managed sliced scrolling" do
|
92
|
+
let(:config) do
|
93
|
+
{
|
94
|
+
'query' => "#{LogStash::Json.dump(query)}",
|
95
|
+
'slices' => slices,
|
96
|
+
'docinfo' => true, # include ids
|
97
|
+
}
|
98
|
+
end
|
99
|
+
let(:query) do
|
100
|
+
{
|
101
|
+
"query" => {
|
102
|
+
"match" => { "city_name" => "Okinawa" }
|
103
|
+
},
|
104
|
+
"fields" => ["message"]
|
105
|
+
}
|
106
|
+
end
|
107
|
+
let(:slices) { 2 }
|
108
|
+
|
109
|
+
context 'with `slices => 0`' do
|
110
|
+
let(:slices) { 0 }
|
111
|
+
it 'fails to register' do
|
112
|
+
expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
context 'with `slices => 1`' do
|
117
|
+
let(:slices) { 1 }
|
118
|
+
it 'runs just one slice' do
|
119
|
+
expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
|
120
|
+
expect(Thread).to_not receive(:new)
|
121
|
+
|
122
|
+
plugin.register
|
123
|
+
plugin.run([])
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'without slices directive' do
|
128
|
+
let(:config) { super().except('slices') }
|
129
|
+
it 'runs just one slice' do
|
130
|
+
expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
|
131
|
+
expect(Thread).to_not receive(:new)
|
132
|
+
|
133
|
+
plugin.register
|
134
|
+
plugin.run([])
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
2.upto(8) do |slice_count|
|
139
|
+
context "with `slices => #{slice_count}`" do
|
140
|
+
let(:slices) { slice_count }
|
141
|
+
it "runs #{slice_count} independent slices" do
|
142
|
+
expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times
|
143
|
+
slice_count.times do |slice_id|
|
144
|
+
expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), slice_id)
|
145
|
+
end
|
146
|
+
|
147
|
+
plugin.register
|
148
|
+
plugin.run([])
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# This section of specs heavily mocks the Elasticsearch::Client, and ensures that the Elasticsearch Input Plugin
|
154
|
+
# behaves as expected when handling a series of sliced, scrolled requests/responses.
|
155
|
+
context 'adapter/integration' do
|
156
|
+
let(:response_template) do
|
157
|
+
{
|
158
|
+
"took" => 12,
|
159
|
+
"timed_out" => false,
|
160
|
+
"shards" => {
|
161
|
+
"total" => 6,
|
162
|
+
"successful" => 6,
|
163
|
+
"failed" => 0
|
164
|
+
}
|
165
|
+
}
|
166
|
+
end
|
167
|
+
|
168
|
+
let(:hits_template) do
|
169
|
+
{
|
170
|
+
"total" => 4,
|
171
|
+
"max_score" => 1.0,
|
172
|
+
"hits" => []
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
let(:hit_template) do
|
177
|
+
{
|
178
|
+
"_index" => "logstash-2018.08.23",
|
179
|
+
"_type" => "logs",
|
180
|
+
"_score" => 1.0,
|
181
|
+
"_source" => { "message" => ["hello, world"] }
|
182
|
+
}
|
183
|
+
end
|
184
|
+
|
185
|
+
# BEGIN SLICE 0: a sequence of THREE scrolled responses containing 2, 1, and 0 items
|
186
|
+
# end-of-slice is reached when slice0_response2 is empty.
|
187
|
+
begin
|
188
|
+
let(:slice0_response0) do
|
189
|
+
response_template.merge({
|
190
|
+
"_scroll_id" => slice0_scroll1,
|
191
|
+
"hits" => hits_template.merge("hits" => [
|
192
|
+
hit_template.merge('_id' => "slice0-response0-item0"),
|
193
|
+
hit_template.merge('_id' => "slice0-response0-item1")
|
194
|
+
])
|
195
|
+
})
|
196
|
+
end
|
197
|
+
let(:slice0_scroll1) { 'slice:0,scroll:1' }
|
198
|
+
let(:slice0_response1) do
|
199
|
+
response_template.merge({
|
200
|
+
"_scroll_id" => slice0_scroll2,
|
201
|
+
"hits" => hits_template.merge("hits" => [
|
202
|
+
hit_template.merge('_id' => "slice0-response1-item0")
|
203
|
+
])
|
204
|
+
})
|
205
|
+
end
|
206
|
+
let(:slice0_scroll2) { 'slice:0,scroll:2' }
|
207
|
+
let(:slice0_response2) do
|
208
|
+
response_template.merge(
|
209
|
+
"_scroll_id" => slice0_scroll3,
|
210
|
+
"hits" => hits_template.merge({"hits" => []})
|
211
|
+
)
|
212
|
+
end
|
213
|
+
let(:slice0_scroll3) { 'slice:0,scroll:3' }
|
214
|
+
end
|
215
|
+
# END SLICE 0
|
216
|
+
|
217
|
+
# BEGIN SLICE 1: a sequence of TWO scrolled responses containing 2 and 2 items.
|
218
|
+
# end-of-slice is reached when slice1_response1 does not contain a next scroll id
|
219
|
+
begin
|
220
|
+
let(:slice1_response0) do
|
221
|
+
response_template.merge({
|
222
|
+
"_scroll_id" => slice1_scroll1,
|
223
|
+
"hits" => hits_template.merge("hits" => [
|
224
|
+
hit_template.merge('_id' => "slice1-response0-item0"),
|
225
|
+
hit_template.merge('_id' => "slice1-response0-item1")
|
226
|
+
])
|
227
|
+
})
|
228
|
+
end
|
229
|
+
let(:slice1_scroll1) { 'slice:1,scroll:1' }
|
230
|
+
let(:slice1_response1) do
|
231
|
+
response_template.merge({
|
232
|
+
"hits" => hits_template.merge("hits" => [
|
233
|
+
hit_template.merge('_id' => "slice1-response1-item0"),
|
234
|
+
hit_template.merge('_id' => "slice1-response1-item1")
|
235
|
+
])
|
236
|
+
})
|
237
|
+
end
|
238
|
+
end
|
239
|
+
# END SLICE 1
|
240
|
+
|
241
|
+
let(:client) { Elasticsearch::Client.new }
|
242
|
+
|
243
|
+
# RSpec mocks validations are not threadsafe.
|
244
|
+
# Allow caller to synchronize.
|
245
|
+
def synchronize_method!(object, method_name)
|
246
|
+
original_method = object.method(method_name)
|
247
|
+
mutex = Mutex.new
|
248
|
+
allow(object).to receive(method_name).with(any_args) do |*method_args, &method_block|
|
249
|
+
mutex.synchronize do
|
250
|
+
original_method.call(*method_args,&method_block)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
before(:each) do
|
256
|
+
expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
|
257
|
+
plugin.register
|
258
|
+
|
259
|
+
# SLICE0 is a three-page scroll in which the last page is empty
|
260
|
+
slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2}))
|
261
|
+
expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0)
|
262
|
+
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1)
|
263
|
+
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2)
|
264
|
+
|
265
|
+
# SLICE1 is a two-page scroll in which the last page has no next scroll id
|
266
|
+
slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2}))
|
267
|
+
expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
|
268
|
+
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1)
|
269
|
+
|
270
|
+
synchronize_method!(plugin, :scroll_request)
|
271
|
+
synchronize_method!(plugin, :search_request)
|
272
|
+
end
|
273
|
+
|
274
|
+
let(:emitted_events) do
|
275
|
+
queue = Queue.new # since we are running slices in threads, we need a thread-safe queue.
|
276
|
+
plugin.run(queue)
|
277
|
+
events = []
|
278
|
+
events << queue.pop until queue.empty?
|
279
|
+
events
|
280
|
+
end
|
281
|
+
|
282
|
+
let(:emitted_event_ids) do
|
283
|
+
emitted_events.map { |event| event.get('[@metadata][_id]') }
|
284
|
+
end
|
285
|
+
|
286
|
+
it 'emits the hits on the first page of the first slice' do
|
287
|
+
expect(emitted_event_ids).to include('slice0-response0-item0')
|
288
|
+
expect(emitted_event_ids).to include('slice0-response0-item1')
|
289
|
+
end
|
290
|
+
it 'emits the hits on the second page of the first slice' do
|
291
|
+
expect(emitted_event_ids).to include('slice0-response1-item0')
|
292
|
+
end
|
293
|
+
|
294
|
+
it 'emits the hits on the first page of the second slice' do
|
295
|
+
expect(emitted_event_ids).to include('slice1-response0-item0')
|
296
|
+
expect(emitted_event_ids).to include('slice1-response0-item1')
|
297
|
+
end
|
298
|
+
|
299
|
+
it 'emits the hitson the second page of the second slice' do
|
300
|
+
expect(emitted_event_ids).to include('slice1-response1-item0')
|
301
|
+
expect(emitted_event_ids).to include('slice1-response1-item1')
|
302
|
+
end
|
303
|
+
|
304
|
+
it 'does not double-emit' do
|
305
|
+
expect(emitted_event_ids.uniq).to eq(emitted_event_ids)
|
306
|
+
end
|
307
|
+
|
308
|
+
it 'emits events with appropriate fields' do
|
309
|
+
emitted_events.each do |event|
|
310
|
+
expect(event).to be_a(LogStash::Event)
|
311
|
+
expect(event.get('message')).to eq(['hello, world'])
|
312
|
+
expect(event.get('[@metadata][_id]')).to_not be_nil
|
313
|
+
expect(event.get('[@metadata][_id]')).to_not be_empty
|
314
|
+
expect(event.get('[@metadata][_index]')).to start_with('logstash-')
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
87
320
|
context "with Elasticsearch document information" do
|
88
321
|
let!(:response) do
|
89
322
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -202,7 +202,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
202
202
|
version: '0'
|
203
203
|
requirements: []
|
204
204
|
rubyforge_project:
|
205
|
-
rubygems_version: 2.6.
|
205
|
+
rubygems_version: 2.6.13
|
206
206
|
signing_key:
|
207
207
|
specification_version: 4
|
208
208
|
summary: Reads query results from an Elasticsearch cluster
|