logstash-input-elasticsearch 4.2.1 → 4.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +25 -0
- data/lib/logstash/inputs/elasticsearch.rb +40 -4
- data/logstash-input-elasticsearch.gemspec +1 -1
- data/spec/inputs/elasticsearch_spec.rb +233 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d8999f6e5261a2aedcf91e63941d0dbd0088af5969c7a132fdb9b2c64100985
|
4
|
+
data.tar.gz: e85ebd29d645319b5f498ce44e6bbea68f752ab480917e20237ff5482fd57492
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de04e26035cb7a0ab9448f630f6d833894c9c0c74de60f7a781efdf0eebafdb7c54692a31790543ca019932bd84751ff67366bcff78f9a739967c5a8df627452
|
7
|
+
data.tar.gz: 1984940fed8ca921ef26e8e4c30ce3c5c6bda4c0ed681676ca657c92a32e120518f2e4ac58d51a71e6951fde2fcd529a22227f05687517559b65991145217392
|
data/CHANGELOG.md
CHANGED
data/docs/index.asciidoc
CHANGED
@@ -97,6 +97,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
97
97
|
| <<plugins-{type}s-{plugin}-schedule>> |<<string,string>>|No
|
98
98
|
| <<plugins-{type}s-{plugin}-scroll>> |<<string,string>>|No
|
99
99
|
| <<plugins-{type}s-{plugin}-size>> |<<number,number>>|No
|
100
|
+
| <<plugins-{type}s-{plugin}-slices>> |<<number,number>>|No
|
100
101
|
| <<plugins-{type}s-{plugin}-ssl>> |<<boolean,boolean>>|No
|
101
102
|
| <<plugins-{type}s-{plugin}-user>> |<<string,string>>|No
|
102
103
|
|=======================================================================
|
@@ -250,6 +251,30 @@ round trip (i.e. between the previous scroll request, to the next).
|
|
250
251
|
|
251
252
|
This allows you to set the maximum number of hits returned per scroll.
|
252
253
|
|
254
|
+
[id="plugins-{type}s-{plugin}-slices"]
|
255
|
+
===== `slices`
|
256
|
+
|
257
|
+
* Value type is <<number,number>>
|
258
|
+
* There is no default value.
|
259
|
+
* Sensible values range from 2 to about 8.
|
260
|
+
|
261
|
+
In some cases, it is possible to improve overall throughput by consuming multiple
|
262
|
+
distinct slices of a query simultaneously using the
|
263
|
+
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#sliced-scroll[Sliced Scroll API],
|
264
|
+
especially if the pipeline is spending significant time waiting on Elasticsearch
|
265
|
+
to provide results.
|
266
|
+
|
267
|
+
If set, the `slices` parameter tells the plugin how many slices to divide the work
|
268
|
+
into, and will produce events from the slices in parallel until all of them are done
|
269
|
+
scrolling.
|
270
|
+
|
271
|
+
NOTE: The Elasticsearch manual indicates that there can be _negative_ performance
|
272
|
+
implications to both the query and the Elasticsearch cluster when a scrolling
|
273
|
+
query uses more slices than shards in the index.
|
274
|
+
|
275
|
+
If the `slices` parameter is left unset, the plugin will _not_ inject slice
|
276
|
+
instructions into the query.
|
277
|
+
|
253
278
|
[id="plugins-{type}s-{plugin}-ssl"]
|
254
279
|
===== `ssl`
|
255
280
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/inputs/base"
|
3
3
|
require "logstash/namespace"
|
4
|
+
require "logstash/json"
|
4
5
|
require "base64"
|
5
6
|
|
6
7
|
# .Compatibility Note
|
@@ -83,6 +84,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
83
84
|
# round trip (i.e. between the previous scroll request, to the next).
|
84
85
|
config :scroll, :validate => :string, :default => "1m"
|
85
86
|
|
87
|
+
# This parameter controls the number of parallel slices to be consumed simultaneously
|
88
|
+
# by this pipeline input.
|
89
|
+
config :slices, :validate => :number
|
90
|
+
|
86
91
|
# If set, include Elasticsearch document information such as index, type, and
|
87
92
|
# the id in the event.
|
88
93
|
#
|
@@ -147,10 +152,14 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
147
152
|
|
148
153
|
@options = {
|
149
154
|
:index => @index,
|
150
|
-
:body => @query,
|
151
155
|
:scroll => @scroll,
|
152
156
|
:size => @size
|
153
157
|
}
|
158
|
+
@base_query = LogStash::Json.load(@query)
|
159
|
+
if @slices
|
160
|
+
@base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option")
|
161
|
+
@slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`")
|
162
|
+
end
|
154
163
|
|
155
164
|
transport_options = {}
|
156
165
|
|
@@ -196,16 +205,39 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
196
205
|
private
|
197
206
|
|
198
207
|
def do_run(output_queue)
|
199
|
-
#
|
200
|
-
|
208
|
+
# if configured to run a single slice, don't bother spinning up threads
|
209
|
+
return do_run_slice(output_queue) if @slices.nil? || @slices <= 1
|
210
|
+
|
211
|
+
logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8
|
212
|
+
|
213
|
+
@slices.times.map do |slice_id|
|
214
|
+
Thread.new do
|
215
|
+
LogStash::Util::set_thread_name("#{@id}_slice_#{slice_id}")
|
216
|
+
do_run_slice(output_queue, slice_id)
|
217
|
+
end
|
218
|
+
end.map(&:join)
|
219
|
+
end
|
220
|
+
|
221
|
+
def do_run_slice(output_queue, slice_id=nil)
|
222
|
+
slice_query = @base_query
|
223
|
+
slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil?
|
224
|
+
|
225
|
+
slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) )
|
226
|
+
|
227
|
+
logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
228
|
+
r = search_request(slice_options)
|
201
229
|
|
202
230
|
r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
|
231
|
+
logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
232
|
+
|
203
233
|
has_hits = r['hits']['hits'].any?
|
204
234
|
|
205
|
-
while has_hits && !stop?
|
235
|
+
while has_hits && r['_scroll_id'] && !stop?
|
206
236
|
r = process_next_scroll(output_queue, r['_scroll_id'])
|
237
|
+
logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
207
238
|
has_hits = r['has_hits']
|
208
239
|
end
|
240
|
+
logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil?
|
209
241
|
end
|
210
242
|
|
211
243
|
def process_next_scroll(output_queue, scroll_id)
|
@@ -243,4 +275,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
|
|
243
275
|
def scroll_request scroll_id
|
244
276
|
@client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll)
|
245
277
|
end
|
278
|
+
|
279
|
+
def search_request(options)
|
280
|
+
@client.search(options)
|
281
|
+
end
|
246
282
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-elasticsearch'
|
4
|
-
s.version = '4.
|
4
|
+
s.version = '4.3.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Reads query results from an Elasticsearch cluster"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -84,6 +84,239 @@ describe LogStash::Inputs::Elasticsearch do
|
|
84
84
|
insist { event.get("message") } == [ "ohayo" ]
|
85
85
|
end
|
86
86
|
|
87
|
+
|
88
|
+
# This spec is an adapter-spec, ensuring that we send the right sequence of messages to our Elasticsearch Client
|
89
|
+
# to support sliced scrolling. The underlying implementation will spawn its own threads to consume, so we must be
|
90
|
+
# careful to use thread-safe constructs.
|
91
|
+
context "with managed sliced scrolling" do
|
92
|
+
let(:config) do
|
93
|
+
{
|
94
|
+
'query' => "#{LogStash::Json.dump(query)}",
|
95
|
+
'slices' => slices,
|
96
|
+
'docinfo' => true, # include ids
|
97
|
+
}
|
98
|
+
end
|
99
|
+
let(:query) do
|
100
|
+
{
|
101
|
+
"query" => {
|
102
|
+
"match" => { "city_name" => "Okinawa" }
|
103
|
+
},
|
104
|
+
"fields" => ["message"]
|
105
|
+
}
|
106
|
+
end
|
107
|
+
let(:slices) { 2 }
|
108
|
+
|
109
|
+
context 'with `slices => 0`' do
|
110
|
+
let(:slices) { 0 }
|
111
|
+
it 'fails to register' do
|
112
|
+
expect { plugin.register }.to raise_error(LogStash::ConfigurationError)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
context 'with `slices => 1`' do
|
117
|
+
let(:slices) { 1 }
|
118
|
+
it 'runs just one slice' do
|
119
|
+
expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
|
120
|
+
expect(Thread).to_not receive(:new)
|
121
|
+
|
122
|
+
plugin.register
|
123
|
+
plugin.run([])
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'without slices directive' do
|
128
|
+
let(:config) { super().except('slices') }
|
129
|
+
it 'runs just one slice' do
|
130
|
+
expect(plugin).to receive(:do_run_slice).with(duck_type(:<<))
|
131
|
+
expect(Thread).to_not receive(:new)
|
132
|
+
|
133
|
+
plugin.register
|
134
|
+
plugin.run([])
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
2.upto(8) do |slice_count|
|
139
|
+
context "with `slices => #{slice_count}`" do
|
140
|
+
let(:slices) { slice_count }
|
141
|
+
it "runs #{slice_count} independent slices" do
|
142
|
+
expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times
|
143
|
+
slice_count.times do |slice_id|
|
144
|
+
expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), slice_id)
|
145
|
+
end
|
146
|
+
|
147
|
+
plugin.register
|
148
|
+
plugin.run([])
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# This section of specs heavily mocks the Elasticsearch::Client, and ensures that the Elasticsearch Input Plugin
|
154
|
+
# behaves as expected when handling a series of sliced, scrolled requests/responses.
|
155
|
+
context 'adapter/integration' do
|
156
|
+
let(:response_template) do
|
157
|
+
{
|
158
|
+
"took" => 12,
|
159
|
+
"timed_out" => false,
|
160
|
+
"shards" => {
|
161
|
+
"total" => 6,
|
162
|
+
"successful" => 6,
|
163
|
+
"failed" => 0
|
164
|
+
}
|
165
|
+
}
|
166
|
+
end
|
167
|
+
|
168
|
+
let(:hits_template) do
|
169
|
+
{
|
170
|
+
"total" => 4,
|
171
|
+
"max_score" => 1.0,
|
172
|
+
"hits" => []
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
let(:hit_template) do
|
177
|
+
{
|
178
|
+
"_index" => "logstash-2018.08.23",
|
179
|
+
"_type" => "logs",
|
180
|
+
"_score" => 1.0,
|
181
|
+
"_source" => { "message" => ["hello, world"] }
|
182
|
+
}
|
183
|
+
end
|
184
|
+
|
185
|
+
# BEGIN SLICE 0: a sequence of THREE scrolled responses containing 2, 1, and 0 items
|
186
|
+
# end-of-slice is reached when slice0_response2 is empty.
|
187
|
+
begin
|
188
|
+
let(:slice0_response0) do
|
189
|
+
response_template.merge({
|
190
|
+
"_scroll_id" => slice0_scroll1,
|
191
|
+
"hits" => hits_template.merge("hits" => [
|
192
|
+
hit_template.merge('_id' => "slice0-response0-item0"),
|
193
|
+
hit_template.merge('_id' => "slice0-response0-item1")
|
194
|
+
])
|
195
|
+
})
|
196
|
+
end
|
197
|
+
let(:slice0_scroll1) { 'slice:0,scroll:1' }
|
198
|
+
let(:slice0_response1) do
|
199
|
+
response_template.merge({
|
200
|
+
"_scroll_id" => slice0_scroll2,
|
201
|
+
"hits" => hits_template.merge("hits" => [
|
202
|
+
hit_template.merge('_id' => "slice0-response1-item0")
|
203
|
+
])
|
204
|
+
})
|
205
|
+
end
|
206
|
+
let(:slice0_scroll2) { 'slice:0,scroll:2' }
|
207
|
+
let(:slice0_response2) do
|
208
|
+
response_template.merge(
|
209
|
+
"_scroll_id" => slice0_scroll3,
|
210
|
+
"hits" => hits_template.merge({"hits" => []})
|
211
|
+
)
|
212
|
+
end
|
213
|
+
let(:slice0_scroll3) { 'slice:0,scroll:3' }
|
214
|
+
end
|
215
|
+
# END SLICE 0
|
216
|
+
|
217
|
+
# BEGIN SLICE 1: a sequence of TWO scrolled responses containing 2 and 2 items.
|
218
|
+
# end-of-slice is reached when slice1_response1 does not contain a next scroll id
|
219
|
+
begin
|
220
|
+
let(:slice1_response0) do
|
221
|
+
response_template.merge({
|
222
|
+
"_scroll_id" => slice1_scroll1,
|
223
|
+
"hits" => hits_template.merge("hits" => [
|
224
|
+
hit_template.merge('_id' => "slice1-response0-item0"),
|
225
|
+
hit_template.merge('_id' => "slice1-response0-item1")
|
226
|
+
])
|
227
|
+
})
|
228
|
+
end
|
229
|
+
let(:slice1_scroll1) { 'slice:1,scroll:1' }
|
230
|
+
let(:slice1_response1) do
|
231
|
+
response_template.merge({
|
232
|
+
"hits" => hits_template.merge("hits" => [
|
233
|
+
hit_template.merge('_id' => "slice1-response1-item0"),
|
234
|
+
hit_template.merge('_id' => "slice1-response1-item1")
|
235
|
+
])
|
236
|
+
})
|
237
|
+
end
|
238
|
+
end
|
239
|
+
# END SLICE 1
|
240
|
+
|
241
|
+
let(:client) { Elasticsearch::Client.new }
|
242
|
+
|
243
|
+
# RSpec mocks validations are not threadsafe.
|
244
|
+
# Allow caller to synchronize.
|
245
|
+
def synchronize_method!(object, method_name)
|
246
|
+
original_method = object.method(method_name)
|
247
|
+
mutex = Mutex.new
|
248
|
+
allow(object).to receive(method_name).with(any_args) do |*method_args, &method_block|
|
249
|
+
mutex.synchronize do
|
250
|
+
original_method.call(*method_args,&method_block)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
before(:each) do
|
256
|
+
expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client)
|
257
|
+
plugin.register
|
258
|
+
|
259
|
+
# SLICE0 is a three-page scroll in which the last page is empty
|
260
|
+
slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2}))
|
261
|
+
expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0)
|
262
|
+
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1)
|
263
|
+
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2)
|
264
|
+
|
265
|
+
# SLICE1 is a two-page scroll in which the last page has no next scroll id
|
266
|
+
slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2}))
|
267
|
+
expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0)
|
268
|
+
expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1)
|
269
|
+
|
270
|
+
synchronize_method!(plugin, :scroll_request)
|
271
|
+
synchronize_method!(plugin, :search_request)
|
272
|
+
end
|
273
|
+
|
274
|
+
let(:emitted_events) do
|
275
|
+
queue = Queue.new # since we are running slices in threads, we need a thread-safe queue.
|
276
|
+
plugin.run(queue)
|
277
|
+
events = []
|
278
|
+
events << queue.pop until queue.empty?
|
279
|
+
events
|
280
|
+
end
|
281
|
+
|
282
|
+
let(:emitted_event_ids) do
|
283
|
+
emitted_events.map { |event| event.get('[@metadata][_id]') }
|
284
|
+
end
|
285
|
+
|
286
|
+
it 'emits the hits on the first page of the first slice' do
|
287
|
+
expect(emitted_event_ids).to include('slice0-response0-item0')
|
288
|
+
expect(emitted_event_ids).to include('slice0-response0-item1')
|
289
|
+
end
|
290
|
+
it 'emits the hits on the second page of the first slice' do
|
291
|
+
expect(emitted_event_ids).to include('slice0-response1-item0')
|
292
|
+
end
|
293
|
+
|
294
|
+
it 'emits the hits on the first page of the second slice' do
|
295
|
+
expect(emitted_event_ids).to include('slice1-response0-item0')
|
296
|
+
expect(emitted_event_ids).to include('slice1-response0-item1')
|
297
|
+
end
|
298
|
+
|
299
|
+
it 'emits the hitson the second page of the second slice' do
|
300
|
+
expect(emitted_event_ids).to include('slice1-response1-item0')
|
301
|
+
expect(emitted_event_ids).to include('slice1-response1-item1')
|
302
|
+
end
|
303
|
+
|
304
|
+
it 'does not double-emit' do
|
305
|
+
expect(emitted_event_ids.uniq).to eq(emitted_event_ids)
|
306
|
+
end
|
307
|
+
|
308
|
+
it 'emits events with appropriate fields' do
|
309
|
+
emitted_events.each do |event|
|
310
|
+
expect(event).to be_a(LogStash::Event)
|
311
|
+
expect(event.get('message')).to eq(['hello, world'])
|
312
|
+
expect(event.get('[@metadata][_id]')).to_not be_nil
|
313
|
+
expect(event.get('[@metadata][_id]')).to_not be_empty
|
314
|
+
expect(event.get('[@metadata][_index]')).to start_with('logstash-')
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
87
320
|
context "with Elasticsearch document information" do
|
88
321
|
let!(:response) do
|
89
322
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -202,7 +202,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
202
202
|
version: '0'
|
203
203
|
requirements: []
|
204
204
|
rubyforge_project:
|
205
|
-
rubygems_version: 2.6.
|
205
|
+
rubygems_version: 2.6.13
|
206
206
|
signing_key:
|
207
207
|
specification_version: 4
|
208
208
|
summary: Reads query results from an Elasticsearch cluster
|