logstash-output-elasticsearch 8.0.1-java → 8.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c67a2d242828906ff0f1a9b5d90c09c7db30b7b0
4
- data.tar.gz: 64da8c0ac68f583f4ba13fbb7e42372aa349e442
3
+ metadata.gz: fbfa89ded2964b66fde5e29cd3155d8f2da08e03
4
+ data.tar.gz: 3c603e14218bc92384c4112527d38c834ff7fb3e
5
5
  SHA512:
6
- metadata.gz: f8b439be4170362cdbebf0aff83a98d49a8996826de5a31946d772ba4e1633420fc06f3c0e68d7b4f3f00e62e5dede53b8fd1c7b798a1822973605489058cc8b
7
- data.tar.gz: 3b204be36010c921c014cfa5b3ed867d4b13ae2d40271437d8b4caad04763003182f23a7e79f4334527df2a8179af8ec2a0478e0fd9bc29be086c558fc9e24c5
6
+ metadata.gz: aea0668c8ac704c1db41f01f62bdf84a173776b8be7a55e04a6de6898121a3f89a3b94be2c7a94b1d75c17407926bb8c033627dcb1507a1fcbae4d1b04aa16c8
7
+ data.tar.gz: 23cce8e779627097c33cf6ebb59106f6353cd40fa9d786f67b74fa42655738646104dfdd1a5ddfc27e4609b6187fec3018eb540e0848c5862129662d5686fd59
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 8.1.1
2
+ - Retry all non-200 responses of the bulk API indefinitely
3
+ - Improve documentation on retry codes
4
+
5
+ ## 8.1.0
6
+ - Support Elasticsearch 6.x join field type
7
+ ## 8.0.2
8
+ - Fix bug where logging errors for bad response codes would raise an unhandled exception
9
+
1
10
  ## 8.0.1
2
11
  - Fix some documentation issues
3
12
 
data/docs/index.asciidoc CHANGED
@@ -55,17 +55,18 @@ the new template is installed.
55
55
 
56
56
  ==== Retry Policy
57
57
 
58
- The retry policy has changed significantly in the 2.2.0 release.
58
+ The retry policy has changed significantly in the 8.1.1 release.
59
59
  This plugin uses the Elasticsearch bulk API to optimize its imports into Elasticsearch. These requests may experience
60
- either partial or total failures.
60
+ either partial or total failures. The bulk API sends batches of requests to an HTTP endpoint. Error codes for the HTTP
61
+ request are handled differently than error codes for individual documents.
61
62
 
62
- The following errors are retried infinitely:
63
+ HTTP requests to the bulk API are expected to return a 200 response code. All other response codes are retried indefinitely.
63
64
 
64
- - Network errors (inability to connect)
65
- - 429 (Too many requests) and
66
- - 503 (Service unavailable) errors
65
+ The following document errors are handled as follows:
66
+ - 400 and 404 errors are sent to the DLQ if enabled. If a DLQ is not enabled a log message will be emitted and the event will be dropped.
67
+ - 409 errors (conflict) are logged as a warning and dropped.
67
68
 
68
- NOTE: 409 exceptions are no longer retried. Please set a higher `retry_on_conflict` value if you experience 409 exceptions.
69
+ Note that 409 exceptions are no longer retried. Please set a higher `retry_on_conflict` value if you experience 409 exceptions.
69
70
  It is more performant for Elasticsearch to retry these exceptions than this plugin.
70
71
 
71
72
  ==== Batch Sizes ====
@@ -4,14 +4,10 @@ module LogStash; module Outputs; class ElasticSearch;
4
4
  module Common
5
5
  attr_reader :client, :hosts
6
6
 
7
- # These are codes for temporary recoverable conditions
8
- # 429 just means that ES has too much traffic ATM
9
- # 503 means it , or a proxy is temporarily unavailable
10
- RETRYABLE_CODES = [429, 503]
11
-
12
- DLQ_CODES = [400, 404]
13
- SUCCESS_CODES = [200, 201]
14
- CONFLICT_CODE = 409
7
+ # These codes apply to documents, not at the request level
8
+ DOC_DLQ_CODES = [400, 404]
9
+ DOC_SUCCESS_CODES = [200, 201]
10
+ DOC_CONFLICT_CODE = 409
15
11
 
16
12
  # When you use external versioning, you are communicating that you want
17
13
  # to ignore conflicts. More obviously, since an external version is a
@@ -134,12 +130,12 @@ module LogStash; module Outputs; class ElasticSearch;
134
130
  # - For 409, we log and drop. there is nothing we can do
135
131
  # - For a mapping error, we send to dead letter queue for a human to intervene at a later point.
136
132
  # - For everything else there's mastercard. Yep, and we retry indefinitely. This should fix #572 and other transient network issues
137
- if SUCCESS_CODES.include?(status)
133
+ if DOC_SUCCESS_CODES.include?(status)
138
134
  next
139
- elsif CONFLICT_CODE == status
135
+ elsif DOC_CONFLICT_CODE == status
140
136
  @logger.warn "Failed action.", status: status, action: action, response: response if !failure_type_logging_whitelist.include?(failure["type"])
141
137
  next
142
- elsif DLQ_CODES.include?(status)
138
+ elsif DOC_DLQ_CODES.include?(status)
143
139
  action_event = action[2]
144
140
  # To support bwc, we check if DLQ exists. otherwise we log and drop event (previous behavior)
145
141
  if @dlq_writer
@@ -174,8 +170,15 @@ module LogStash; module Outputs; class ElasticSearch;
174
170
  params[:pipeline] = event.sprintf(@pipeline)
175
171
  end
176
172
 
177
- if @parent
178
- params[:parent] = event.sprintf(@parent)
173
+ if @parent
174
+ if @join_field
175
+ join_value = event.get(@join_field)
176
+ parent_value = event.sprintf(@parent)
177
+ event.set(@join_field, { "name" => join_value, "parent" => parent_value })
178
+ params[:_routing] = event.sprintf(@parent)
179
+ else
180
+ params[:parent] = event.sprintf(@parent)
181
+ end
179
182
  end
180
183
 
181
184
  if @action == 'update'
@@ -244,29 +247,22 @@ module LogStash; module Outputs; class ElasticSearch;
244
247
  sleep_interval = next_sleep_interval(sleep_interval)
245
248
  retry unless @stopping.true?
246
249
  rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError => e
247
- if RETRYABLE_CODES.include?(e.response_code)
248
- log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s}
249
- log_hash[:body] = e.body if @logger.debug? # Generally this is too verbose
250
- message = "Encountered a retryable error. Will Retry with exponential backoff "
251
-
252
- # We treat 429s as a special case because these really aren't errors, but
253
- # rather just ES telling us to back off a bit, which we do.
254
- # The other retryable code is 503, which are true errors
255
- # Even though we retry the user should be made aware of these
256
- if e.response_code == 429
257
- logger.debug(message, log_hash)
258
- else
259
- logger.error(message, log_hash)
260
- end
261
-
262
- sleep_interval = sleep_for_interval(sleep_interval)
263
- retry
250
+ log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s}
251
+ log_hash[:body] = e.body if @logger.debug? # Generally this is too verbose
252
+ message = "Encountered a retryable error. Will Retry with exponential backoff "
253
+
254
+ # We treat 429s as a special case because these really aren't errors, but
255
+ # rather just ES telling us to back off a bit, which we do.
256
+ # The other retryable code is 503, which are true errors
257
+ # Even though we retry the user should be made aware of these
258
+ if e.response_code == 429
259
+ logger.debug(message, log_hash)
264
260
  else
265
- log_hash = {:code => e.response_code,
266
- :response_body => e.response_body}
267
- log_hash[:request_body] = e.request_body if @logger.debug?
268
- @logger.error("Got a bad response code from server, but this code is not considered retryable. Request will be dropped", log_hash)
261
+ logger.error(message, log_hash)
269
262
  end
263
+
264
+ sleep_interval = sleep_for_interval(sleep_interval)
265
+ retry
270
266
  rescue => e
271
267
  # Stuff that should never happen
272
268
  # For all other errors print out full connection issues
@@ -279,7 +275,6 @@ module LogStash; module Outputs; class ElasticSearch;
279
275
 
280
276
  @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
281
277
 
282
- # We retry until there are no errors! Errors should all go to the retry queue
283
278
  sleep_interval = sleep_for_interval(sleep_interval)
284
279
  retry unless @stopping.true?
285
280
  end
@@ -78,6 +78,9 @@ module LogStash; module Outputs; class ElasticSearch
78
78
  # This can be dynamic using the `%{foo}` syntax.
79
79
  mod.config :parent, :validate => :string, :default => nil
80
80
 
81
+ # For child documents, name of the join field
82
+ mod.config :join_field, :validate => :string, :default => nil
83
+
81
84
  # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
82
85
  # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
83
86
  # `"127.0.0.1"`
@@ -142,6 +142,13 @@ module LogStash; module Outputs; class ElasticSearch;
142
142
  body_stream.truncate(0)
143
143
  body_stream.seek(0)
144
144
  end
145
+
146
+ if response.code != 200
147
+ raise ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError.new(
148
+ response.code, @bulk_path, body_stream.to_s, response.body
149
+ )
150
+ end
151
+
145
152
  LogStash::Json.load(response.body)
146
153
  end
147
154
 
@@ -66,7 +66,7 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient;
66
66
 
67
67
  request_uri = format_url(url, path)
68
68
 
69
- resp = @manticore.send(method.downcase, request_uri, params)
69
+ resp = @manticore.send(method.downcase, request_uri.to_s, params)
70
70
 
71
71
  # Manticore returns lazy responses by default
72
72
  # We want to block for our usage, this will wait for the repsonse
@@ -106,7 +106,7 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient;
106
106
 
107
107
  request_uri.path = "#{request_uri.path}/#{parsed_path_and_query.path}".gsub(/\/{2,}/, "/")
108
108
 
109
- request_uri.to_s
109
+ request_uri
110
110
  end
111
111
 
112
112
  def close
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-elasticsearch'
3
- s.version = '8.0.1'
3
+ s.version = '8.1.1'
4
4
  s.licenses = ['apache-2.0']
5
5
  s.summary = "Logstash Output to Elasticsearch"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1,7 +1,8 @@
1
1
  require_relative "../../../spec/es_spec_helper"
2
2
  require "logstash/outputs/elasticsearch"
3
3
 
4
- shared_examples "a parent indexer" do
4
+ context "when using elasticsearch 5.x and before", :integration => true, :version_less_than_equal_to_5x => true do
5
+ shared_examples "a type based parent indexer" do
5
6
  let(:index) { 10.times.collect { rand(10).to_s }.join("") }
6
7
  let(:type) { 10.times.collect { rand(10).to_s }.join("") }
7
8
  let(:event_count) { 10000 + rand(500) }
@@ -15,11 +16,10 @@ shared_examples "a parent indexer" do
15
16
  before do
16
17
  # Add mapping and a parent document
17
18
  index_url = "http://#{get_host_port()}/#{index}"
18
- ftw = FTW::Agent.new
19
19
  mapping = { "mappings" => { "#{type}" => { "_parent" => { "type" => "#{type}_parent" } } } }
20
- ftw.put!("#{index_url}", {:body => mapping.to_json, :headers => default_headers})
20
+ Manticore.put("#{index_url}", {:body => mapping.to_json, :headers => default_headers}).call
21
21
  pdoc = { "foo" => "bar" }
22
- ftw.put!("#{index_url}/#{type}_parent/test", {:body => pdoc.to_json, :headers => default_headers})
22
+ Manticore.put("#{index_url}/#{type}_parent/test", {:body => pdoc.to_json, :headers => default_headers}).call
23
23
 
24
24
  subject.register
25
25
  subject.multi_receive(event_count.times.map { LogStash::Event.new("link_to" => "test", "message" => "Hello World!", "type" => type) })
@@ -29,43 +29,131 @@ shared_examples "a parent indexer" do
29
29
  it "ships events" do
30
30
  index_url = "http://#{get_host_port()}/#{index}"
31
31
 
32
- ftw = FTW::Agent.new
33
- ftw.post!("#{index_url}/_refresh")
32
+ Manticore.post("#{index_url}/_refresh").call
34
33
 
35
34
  # Wait until all events are available.
36
35
  Stud::try(10.times) do
37
36
  query = { "query" => { "has_parent" => { "type" => "#{type}_parent", "query" => { "match" => { "foo" => "bar" } } } } }
38
- data = ""
39
- response = ftw.post!("#{index_url}/_count", {:body => query.to_json, :headers => default_headers})
40
- response.read_body { |chunk| data << chunk }
37
+ response = Manticore.post("#{index_url}/_count", {:body => query.to_json, :headers => default_headers})
38
+ data = response.body
41
39
  result = LogStash::Json.load(data)
42
40
  cur_count = result["count"]
43
41
  insist { cur_count } == event_count
44
42
  end
45
43
  end
46
- end
44
+ end
47
45
 
48
- describe "(http protocol) index events with static parent", :integration => true do
49
- it_behaves_like 'a parent indexer' do
50
- let(:parent) { "test" }
51
- let(:config) {
52
- {
53
- "hosts" => get_host_port,
54
- "index" => index,
55
- "parent" => parent
46
+ describe "(http protocol) index events with static parent" do
47
+ it_behaves_like 'a type based parent indexer' do
48
+ let(:parent) { "test" }
49
+ let(:config) {
50
+ {
51
+ "hosts" => get_host_port,
52
+ "index" => index,
53
+ "parent" => parent
54
+ }
56
55
  }
57
- }
56
+ end
58
57
  end
59
- end
60
58
 
61
- describe "(http_protocol) index events with fieldref in parent value", :integration => true do
62
- it_behaves_like 'a parent indexer' do
63
- let(:config) {
64
- {
65
- "hosts" => get_host_port,
66
- "index" => index,
67
- "parent" => "%{link_to}"
59
+ describe "(http_protocol) index events with fieldref in parent value" do
60
+ it_behaves_like 'a type based parent indexer' do
61
+ let(:config) {
62
+ {
63
+ "hosts" => get_host_port,
64
+ "index" => index,
65
+ "parent" => "%{link_to}"
66
+ }
68
67
  }
68
+ end
69
+ end
70
+ end
71
+
72
+ context "when using elasticsearch 6.x and above", :integration => true, :version_greater_than_equal_to_6x => true do
73
+
74
+ shared_examples "a join field based parent indexer" do
75
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
76
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
77
+ let(:event_count) { 10000 + rand(500) }
78
+ let(:parent) { "not_implemented" }
79
+ let(:config) { "not_implemented" }
80
+ let(:parent_id) { "test" }
81
+ let(:join_field) { "join_field" }
82
+ let(:parent_relation) { "parent_type" }
83
+ let(:child_relation) { "child_type" }
84
+ let(:default_headers) {
85
+ {"Content-Type" => "application/json"}
69
86
  }
87
+ subject { LogStash::Outputs::ElasticSearch.new(config) }
88
+
89
+ before do
90
+ # Add mapping and a parent document
91
+ index_url = "http://#{get_host_port()}/#{index}"
92
+ mapping = {
93
+ "mappings" => {
94
+ type => {
95
+ "properties" => {
96
+ join_field => {
97
+ "type" => "join",
98
+ "relations" => { parent_relation => child_relation }
99
+ }
100
+ }
101
+ }
102
+ }
103
+ }
104
+ Manticore.put("#{index_url}", {:body => mapping.to_json, :headers => default_headers}).call
105
+ pdoc = { "message" => "ohayo", join_field => parent_relation }
106
+ Manticore.put("#{index_url}/#{type}/#{parent_id}", {:body => pdoc.to_json, :headers => default_headers}).call
107
+
108
+ subject.register
109
+ subject.multi_receive(event_count.times.map { LogStash::Event.new("link_to" => parent_id, "message" => "Hello World!", join_field => child_relation) })
110
+ end
111
+
112
+
113
+ it "ships events" do
114
+ index_url = "http://#{get_host_port()}/#{index}"
115
+
116
+ Manticore.post("#{index_url}/_refresh").call
117
+
118
+ # Wait until all events are available.
119
+ Stud::try(10.times) do
120
+ query = { "query" => { "has_parent" => { "parent_type" => parent_relation, "query" => { "match_all" => { } } } } }
121
+ response = Manticore.post("#{index_url}/_count", {:body => query.to_json, :headers => default_headers})
122
+ data = response.body
123
+ result = LogStash::Json.load(data)
124
+ cur_count = result["count"]
125
+ insist { cur_count } == event_count
126
+ end
127
+ end
128
+ end
129
+
130
+ describe "(http protocol) index events with static parent" do
131
+ it_behaves_like 'a join field based parent indexer' do
132
+ let(:config) {
133
+ {
134
+ "hosts" => get_host_port,
135
+ "index" => index,
136
+ "parent" => parent_id,
137
+ "document_type" => type,
138
+ "join_field" => join_field,
139
+ "manage_template" => false
140
+ }
141
+ }
142
+ end
143
+ end
144
+
145
+ describe "(http_protocol) index events with fieldref in parent value" do
146
+ it_behaves_like 'a join field based parent indexer' do
147
+ let(:config) {
148
+ {
149
+ "hosts" => get_host_port,
150
+ "index" => index,
151
+ "parent" => "%{link_to}",
152
+ "document_type" => type,
153
+ "join_field" => join_field,
154
+ "manage_template" => false
155
+ }
156
+ }
157
+ end
70
158
  end
71
159
  end
@@ -44,19 +44,44 @@ describe LogStash::Outputs::ElasticSearch::HttpClient::ManticoreAdapter do
44
44
  end
45
45
  end
46
46
 
47
+ describe "bad response codes" do
48
+ let(:uri) { ::LogStash::Util::SafeURI.new("http://localhost:9200") }
49
+
50
+ it "should raise a bad response code error" do
51
+ resp = double("response")
52
+ allow(resp).to receive(:call)
53
+ allow(resp).to receive(:code).and_return(500)
54
+ allow(resp).to receive(:body).and_return("a body")
55
+
56
+ expect(subject.manticore).to receive(:get).
57
+ with(uri.to_s + "/", anything).
58
+ and_return(resp)
59
+
60
+ uri_with_path = uri.clone
61
+ uri_with_path.path = "/"
62
+
63
+ expect(::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError).to receive(:new).
64
+ with(resp.code, uri_with_path, nil, resp.body).and_call_original
65
+
66
+ expect do
67
+ subject.perform_request(uri, :get, "/")
68
+ end.to raise_error(::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError)
69
+ end
70
+ end
71
+
47
72
  describe "format_url" do
48
73
  let(:url) { ::LogStash::Util::SafeURI.new("http://localhost:9200/path/") }
49
74
  let(:path) { "_bulk" }
50
75
  subject { described_class.new(double("logger"), {}) }
51
76
 
52
77
  it "should add the path argument to the uri's path" do
53
- expect(java.net.URI.new(subject.format_url(url, path)).path).to eq("/path/_bulk")
78
+ expect(subject.format_url(url, path).path).to eq("/path/_bulk")
54
79
  end
55
80
 
56
81
  context "when uri contains query parameters" do
57
82
  let(:query_params) { "query=value&key=value2" }
58
83
  let(:url) { ::LogStash::Util::SafeURI.new("http://localhost:9200/path/?#{query_params}") }
59
- let(:formatted) { java.net.URI.new(subject.format_url(url, path))}
84
+ let(:formatted) { subject.format_url(url, path)}
60
85
 
61
86
  it "should retain query_params after format" do
62
87
  expect(formatted.query).to eq(query_params)
@@ -73,7 +98,7 @@ describe LogStash::Outputs::ElasticSearch::HttpClient::ManticoreAdapter do
73
98
 
74
99
  context "when the path contains query parameters" do
75
100
  let(:path) { "/special_bulk?pathParam=1"}
76
- let(:formatted) { java.net.URI.new(subject.format_url(url, path)) }
101
+ let(:formatted) { subject.format_url(url, path) }
77
102
 
78
103
  it "should add the path correctly" do
79
104
  expect(formatted.path).to eq("#{url.path}special_bulk")
@@ -86,10 +111,10 @@ describe LogStash::Outputs::ElasticSearch::HttpClient::ManticoreAdapter do
86
111
 
87
112
  context "when uri contains credentials" do
88
113
  let(:url) { ::LogStash::Util::SafeURI.new("http://myuser:mypass@localhost:9200") }
89
- let(:formatted) { java.net.URI.new(subject.format_url(url, path)) }
114
+ let(:formatted) { subject.format_url(url, path) }
90
115
 
91
116
  it "should remove credentials after format" do
92
- expect(formatted.user_info).to be_nil
117
+ expect(formatted.userinfo).to be_nil
93
118
  end
94
119
  end
95
120
  end
@@ -206,8 +206,10 @@ describe "outputs/elasticsearch" do
206
206
  )
207
207
  end
208
208
  let(:logger) { double("logger").as_null_object }
209
+ let(:response) { { :errors => [], :items => [] } }
209
210
 
210
211
  before(:each) do
212
+
211
213
  i = 0
212
214
  bulk_param = [["index", anything, event.to_hash]]
213
215
 
@@ -219,7 +221,7 @@ describe "outputs/elasticsearch" do
219
221
  if i == 1
220
222
  raise error
221
223
  end
222
- end
224
+ end.and_return(response)
223
225
  eso.multi_receive([event])
224
226
  end
225
227
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.1
4
+ version: 8.1.1
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-16 00:00:00.000000000 Z
11
+ date: 2017-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement