logstash-output-elasticsearch 8.0.1-java → 8.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c67a2d242828906ff0f1a9b5d90c09c7db30b7b0
4
- data.tar.gz: 64da8c0ac68f583f4ba13fbb7e42372aa349e442
3
+ metadata.gz: fbfa89ded2964b66fde5e29cd3155d8f2da08e03
4
+ data.tar.gz: 3c603e14218bc92384c4112527d38c834ff7fb3e
5
5
  SHA512:
6
- metadata.gz: f8b439be4170362cdbebf0aff83a98d49a8996826de5a31946d772ba4e1633420fc06f3c0e68d7b4f3f00e62e5dede53b8fd1c7b798a1822973605489058cc8b
7
- data.tar.gz: 3b204be36010c921c014cfa5b3ed867d4b13ae2d40271437d8b4caad04763003182f23a7e79f4334527df2a8179af8ec2a0478e0fd9bc29be086c558fc9e24c5
6
+ metadata.gz: aea0668c8ac704c1db41f01f62bdf84a173776b8be7a55e04a6de6898121a3f89a3b94be2c7a94b1d75c17407926bb8c033627dcb1507a1fcbae4d1b04aa16c8
7
+ data.tar.gz: 23cce8e779627097c33cf6ebb59106f6353cd40fa9d786f67b74fa42655738646104dfdd1a5ddfc27e4609b6187fec3018eb540e0848c5862129662d5686fd59
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 8.1.1
2
+ - Retry all non-200 responses of the bulk API indefinitely
3
+ - Improve documentation on retry codes
4
+
5
+ ## 8.1.0
6
+ - Support Elasticsearch 6.x join field type
7
+ ## 8.0.2
8
+ - Fix bug where logging errors for bad response codes would raise an unhandled exception
9
+
1
10
  ## 8.0.1
2
11
  - Fix some documentation issues
3
12
 
data/docs/index.asciidoc CHANGED
@@ -55,17 +55,18 @@ the new template is installed.
55
55
 
56
56
  ==== Retry Policy
57
57
 
58
- The retry policy has changed significantly in the 2.2.0 release.
58
+ The retry policy has changed significantly in the 8.1.1 release.
59
59
  This plugin uses the Elasticsearch bulk API to optimize its imports into Elasticsearch. These requests may experience
60
- either partial or total failures.
60
+ either partial or total failures. The bulk API sends batches of requests to an HTTP endpoint. Error codes for the HTTP
61
+ request are handled differently than error codes for individual documents.
61
62
 
62
- The following errors are retried infinitely:
63
+ HTTP requests to the bulk API are expected to return a 200 response code. All other response codes are retried indefinitely.
63
64
 
64
- - Network errors (inability to connect)
65
- - 429 (Too many requests) and
66
- - 503 (Service unavailable) errors
65
+ The following document errors are handled as follows:
66
+ - 400 and 404 errors are sent to the DLQ if enabled. If a DLQ is not enabled a log message will be emitted and the event will be dropped.
67
+ - 409 errors (conflict) are logged as a warning and dropped.
67
68
 
68
- NOTE: 409 exceptions are no longer retried. Please set a higher `retry_on_conflict` value if you experience 409 exceptions.
69
+ Note that 409 exceptions are no longer retried. Please set a higher `retry_on_conflict` value if you experience 409 exceptions.
69
70
  It is more performant for Elasticsearch to retry these exceptions than this plugin.
70
71
 
71
72
  ==== Batch Sizes ====
@@ -4,14 +4,10 @@ module LogStash; module Outputs; class ElasticSearch;
4
4
  module Common
5
5
  attr_reader :client, :hosts
6
6
 
7
- # These are codes for temporary recoverable conditions
8
- # 429 just means that ES has too much traffic ATM
9
- # 503 means it , or a proxy is temporarily unavailable
10
- RETRYABLE_CODES = [429, 503]
11
-
12
- DLQ_CODES = [400, 404]
13
- SUCCESS_CODES = [200, 201]
14
- CONFLICT_CODE = 409
7
+ # These codes apply to documents, not at the request level
8
+ DOC_DLQ_CODES = [400, 404]
9
+ DOC_SUCCESS_CODES = [200, 201]
10
+ DOC_CONFLICT_CODE = 409
15
11
 
16
12
  # When you use external versioning, you are communicating that you want
17
13
  # to ignore conflicts. More obviously, since an external version is a
@@ -134,12 +130,12 @@ module LogStash; module Outputs; class ElasticSearch;
134
130
  # - For 409, we log and drop. there is nothing we can do
135
131
  # - For a mapping error, we send to dead letter queue for a human to intervene at a later point.
136
132
  # - For everything else there's mastercard. Yep, and we retry indefinitely. This should fix #572 and other transient network issues
137
- if SUCCESS_CODES.include?(status)
133
+ if DOC_SUCCESS_CODES.include?(status)
138
134
  next
139
- elsif CONFLICT_CODE == status
135
+ elsif DOC_CONFLICT_CODE == status
140
136
  @logger.warn "Failed action.", status: status, action: action, response: response if !failure_type_logging_whitelist.include?(failure["type"])
141
137
  next
142
- elsif DLQ_CODES.include?(status)
138
+ elsif DOC_DLQ_CODES.include?(status)
143
139
  action_event = action[2]
144
140
  # To support bwc, we check if DLQ exists. otherwise we log and drop event (previous behavior)
145
141
  if @dlq_writer
@@ -174,8 +170,15 @@ module LogStash; module Outputs; class ElasticSearch;
174
170
  params[:pipeline] = event.sprintf(@pipeline)
175
171
  end
176
172
 
177
- if @parent
178
- params[:parent] = event.sprintf(@parent)
173
+ if @parent
174
+ if @join_field
175
+ join_value = event.get(@join_field)
176
+ parent_value = event.sprintf(@parent)
177
+ event.set(@join_field, { "name" => join_value, "parent" => parent_value })
178
+ params[:_routing] = event.sprintf(@parent)
179
+ else
180
+ params[:parent] = event.sprintf(@parent)
181
+ end
179
182
  end
180
183
 
181
184
  if @action == 'update'
@@ -244,29 +247,22 @@ module LogStash; module Outputs; class ElasticSearch;
244
247
  sleep_interval = next_sleep_interval(sleep_interval)
245
248
  retry unless @stopping.true?
246
249
  rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError => e
247
- if RETRYABLE_CODES.include?(e.response_code)
248
- log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s}
249
- log_hash[:body] = e.body if @logger.debug? # Generally this is too verbose
250
- message = "Encountered a retryable error. Will Retry with exponential backoff "
251
-
252
- # We treat 429s as a special case because these really aren't errors, but
253
- # rather just ES telling us to back off a bit, which we do.
254
- # The other retryable code is 503, which are true errors
255
- # Even though we retry the user should be made aware of these
256
- if e.response_code == 429
257
- logger.debug(message, log_hash)
258
- else
259
- logger.error(message, log_hash)
260
- end
261
-
262
- sleep_interval = sleep_for_interval(sleep_interval)
263
- retry
250
+ log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s}
251
+ log_hash[:body] = e.body if @logger.debug? # Generally this is too verbose
252
+ message = "Encountered a retryable error. Will Retry with exponential backoff "
253
+
254
+ # We treat 429s as a special case because these really aren't errors, but
255
+ # rather just ES telling us to back off a bit, which we do.
256
+ # The other retryable code is 503, which are true errors
257
+ # Even though we retry the user should be made aware of these
258
+ if e.response_code == 429
259
+ logger.debug(message, log_hash)
264
260
  else
265
- log_hash = {:code => e.response_code,
266
- :response_body => e.response_body}
267
- log_hash[:request_body] = e.request_body if @logger.debug?
268
- @logger.error("Got a bad response code from server, but this code is not considered retryable. Request will be dropped", log_hash)
261
+ logger.error(message, log_hash)
269
262
  end
263
+
264
+ sleep_interval = sleep_for_interval(sleep_interval)
265
+ retry
270
266
  rescue => e
271
267
  # Stuff that should never happen
272
268
  # For all other errors print out full connection issues
@@ -279,7 +275,6 @@ module LogStash; module Outputs; class ElasticSearch;
279
275
 
280
276
  @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
281
277
 
282
- # We retry until there are no errors! Errors should all go to the retry queue
283
278
  sleep_interval = sleep_for_interval(sleep_interval)
284
279
  retry unless @stopping.true?
285
280
  end
@@ -78,6 +78,9 @@ module LogStash; module Outputs; class ElasticSearch
78
78
  # This can be dynamic using the `%{foo}` syntax.
79
79
  mod.config :parent, :validate => :string, :default => nil
80
80
 
81
+ # For child documents, name of the join field
82
+ mod.config :join_field, :validate => :string, :default => nil
83
+
81
84
  # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
82
85
  # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
83
86
  # `"127.0.0.1"`
@@ -142,6 +142,13 @@ module LogStash; module Outputs; class ElasticSearch;
142
142
  body_stream.truncate(0)
143
143
  body_stream.seek(0)
144
144
  end
145
+
146
+ if response.code != 200
147
+ raise ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError.new(
148
+ response.code, @bulk_path, body_stream.to_s, response.body
149
+ )
150
+ end
151
+
145
152
  LogStash::Json.load(response.body)
146
153
  end
147
154
 
@@ -66,7 +66,7 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient;
66
66
 
67
67
  request_uri = format_url(url, path)
68
68
 
69
- resp = @manticore.send(method.downcase, request_uri, params)
69
+ resp = @manticore.send(method.downcase, request_uri.to_s, params)
70
70
 
71
71
  # Manticore returns lazy responses by default
72
72
  # We want to block for our usage, this will wait for the repsonse
@@ -106,7 +106,7 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient;
106
106
 
107
107
  request_uri.path = "#{request_uri.path}/#{parsed_path_and_query.path}".gsub(/\/{2,}/, "/")
108
108
 
109
- request_uri.to_s
109
+ request_uri
110
110
  end
111
111
 
112
112
  def close
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-elasticsearch'
3
- s.version = '8.0.1'
3
+ s.version = '8.1.1'
4
4
  s.licenses = ['apache-2.0']
5
5
  s.summary = "Logstash Output to Elasticsearch"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1,7 +1,8 @@
1
1
  require_relative "../../../spec/es_spec_helper"
2
2
  require "logstash/outputs/elasticsearch"
3
3
 
4
- shared_examples "a parent indexer" do
4
+ context "when using elasticsearch 5.x and before", :integration => true, :version_less_than_equal_to_5x => true do
5
+ shared_examples "a type based parent indexer" do
5
6
  let(:index) { 10.times.collect { rand(10).to_s }.join("") }
6
7
  let(:type) { 10.times.collect { rand(10).to_s }.join("") }
7
8
  let(:event_count) { 10000 + rand(500) }
@@ -15,11 +16,10 @@ shared_examples "a parent indexer" do
15
16
  before do
16
17
  # Add mapping and a parent document
17
18
  index_url = "http://#{get_host_port()}/#{index}"
18
- ftw = FTW::Agent.new
19
19
  mapping = { "mappings" => { "#{type}" => { "_parent" => { "type" => "#{type}_parent" } } } }
20
- ftw.put!("#{index_url}", {:body => mapping.to_json, :headers => default_headers})
20
+ Manticore.put("#{index_url}", {:body => mapping.to_json, :headers => default_headers}).call
21
21
  pdoc = { "foo" => "bar" }
22
- ftw.put!("#{index_url}/#{type}_parent/test", {:body => pdoc.to_json, :headers => default_headers})
22
+ Manticore.put("#{index_url}/#{type}_parent/test", {:body => pdoc.to_json, :headers => default_headers}).call
23
23
 
24
24
  subject.register
25
25
  subject.multi_receive(event_count.times.map { LogStash::Event.new("link_to" => "test", "message" => "Hello World!", "type" => type) })
@@ -29,43 +29,131 @@ shared_examples "a parent indexer" do
29
29
  it "ships events" do
30
30
  index_url = "http://#{get_host_port()}/#{index}"
31
31
 
32
- ftw = FTW::Agent.new
33
- ftw.post!("#{index_url}/_refresh")
32
+ Manticore.post("#{index_url}/_refresh").call
34
33
 
35
34
  # Wait until all events are available.
36
35
  Stud::try(10.times) do
37
36
  query = { "query" => { "has_parent" => { "type" => "#{type}_parent", "query" => { "match" => { "foo" => "bar" } } } } }
38
- data = ""
39
- response = ftw.post!("#{index_url}/_count", {:body => query.to_json, :headers => default_headers})
40
- response.read_body { |chunk| data << chunk }
37
+ response = Manticore.post("#{index_url}/_count", {:body => query.to_json, :headers => default_headers})
38
+ data = response.body
41
39
  result = LogStash::Json.load(data)
42
40
  cur_count = result["count"]
43
41
  insist { cur_count } == event_count
44
42
  end
45
43
  end
46
- end
44
+ end
47
45
 
48
- describe "(http protocol) index events with static parent", :integration => true do
49
- it_behaves_like 'a parent indexer' do
50
- let(:parent) { "test" }
51
- let(:config) {
52
- {
53
- "hosts" => get_host_port,
54
- "index" => index,
55
- "parent" => parent
46
+ describe "(http protocol) index events with static parent" do
47
+ it_behaves_like 'a type based parent indexer' do
48
+ let(:parent) { "test" }
49
+ let(:config) {
50
+ {
51
+ "hosts" => get_host_port,
52
+ "index" => index,
53
+ "parent" => parent
54
+ }
56
55
  }
57
- }
56
+ end
58
57
  end
59
- end
60
58
 
61
- describe "(http_protocol) index events with fieldref in parent value", :integration => true do
62
- it_behaves_like 'a parent indexer' do
63
- let(:config) {
64
- {
65
- "hosts" => get_host_port,
66
- "index" => index,
67
- "parent" => "%{link_to}"
59
+ describe "(http_protocol) index events with fieldref in parent value" do
60
+ it_behaves_like 'a type based parent indexer' do
61
+ let(:config) {
62
+ {
63
+ "hosts" => get_host_port,
64
+ "index" => index,
65
+ "parent" => "%{link_to}"
66
+ }
68
67
  }
68
+ end
69
+ end
70
+ end
71
+
72
+ context "when using elasticsearch 6.x and above", :integration => true, :version_greater_than_equal_to_6x => true do
73
+
74
+ shared_examples "a join field based parent indexer" do
75
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
76
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
77
+ let(:event_count) { 10000 + rand(500) }
78
+ let(:parent) { "not_implemented" }
79
+ let(:config) { "not_implemented" }
80
+ let(:parent_id) { "test" }
81
+ let(:join_field) { "join_field" }
82
+ let(:parent_relation) { "parent_type" }
83
+ let(:child_relation) { "child_type" }
84
+ let(:default_headers) {
85
+ {"Content-Type" => "application/json"}
69
86
  }
87
+ subject { LogStash::Outputs::ElasticSearch.new(config) }
88
+
89
+ before do
90
+ # Add mapping and a parent document
91
+ index_url = "http://#{get_host_port()}/#{index}"
92
+ mapping = {
93
+ "mappings" => {
94
+ type => {
95
+ "properties" => {
96
+ join_field => {
97
+ "type" => "join",
98
+ "relations" => { parent_relation => child_relation }
99
+ }
100
+ }
101
+ }
102
+ }
103
+ }
104
+ Manticore.put("#{index_url}", {:body => mapping.to_json, :headers => default_headers}).call
105
+ pdoc = { "message" => "ohayo", join_field => parent_relation }
106
+ Manticore.put("#{index_url}/#{type}/#{parent_id}", {:body => pdoc.to_json, :headers => default_headers}).call
107
+
108
+ subject.register
109
+ subject.multi_receive(event_count.times.map { LogStash::Event.new("link_to" => parent_id, "message" => "Hello World!", join_field => child_relation) })
110
+ end
111
+
112
+
113
+ it "ships events" do
114
+ index_url = "http://#{get_host_port()}/#{index}"
115
+
116
+ Manticore.post("#{index_url}/_refresh").call
117
+
118
+ # Wait until all events are available.
119
+ Stud::try(10.times) do
120
+ query = { "query" => { "has_parent" => { "parent_type" => parent_relation, "query" => { "match_all" => { } } } } }
121
+ response = Manticore.post("#{index_url}/_count", {:body => query.to_json, :headers => default_headers})
122
+ data = response.body
123
+ result = LogStash::Json.load(data)
124
+ cur_count = result["count"]
125
+ insist { cur_count } == event_count
126
+ end
127
+ end
128
+ end
129
+
130
+ describe "(http protocol) index events with static parent" do
131
+ it_behaves_like 'a join field based parent indexer' do
132
+ let(:config) {
133
+ {
134
+ "hosts" => get_host_port,
135
+ "index" => index,
136
+ "parent" => parent_id,
137
+ "document_type" => type,
138
+ "join_field" => join_field,
139
+ "manage_template" => false
140
+ }
141
+ }
142
+ end
143
+ end
144
+
145
+ describe "(http_protocol) index events with fieldref in parent value" do
146
+ it_behaves_like 'a join field based parent indexer' do
147
+ let(:config) {
148
+ {
149
+ "hosts" => get_host_port,
150
+ "index" => index,
151
+ "parent" => "%{link_to}",
152
+ "document_type" => type,
153
+ "join_field" => join_field,
154
+ "manage_template" => false
155
+ }
156
+ }
157
+ end
70
158
  end
71
159
  end
@@ -44,19 +44,44 @@ describe LogStash::Outputs::ElasticSearch::HttpClient::ManticoreAdapter do
44
44
  end
45
45
  end
46
46
 
47
+ describe "bad response codes" do
48
+ let(:uri) { ::LogStash::Util::SafeURI.new("http://localhost:9200") }
49
+
50
+ it "should raise a bad response code error" do
51
+ resp = double("response")
52
+ allow(resp).to receive(:call)
53
+ allow(resp).to receive(:code).and_return(500)
54
+ allow(resp).to receive(:body).and_return("a body")
55
+
56
+ expect(subject.manticore).to receive(:get).
57
+ with(uri.to_s + "/", anything).
58
+ and_return(resp)
59
+
60
+ uri_with_path = uri.clone
61
+ uri_with_path.path = "/"
62
+
63
+ expect(::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError).to receive(:new).
64
+ with(resp.code, uri_with_path, nil, resp.body).and_call_original
65
+
66
+ expect do
67
+ subject.perform_request(uri, :get, "/")
68
+ end.to raise_error(::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError)
69
+ end
70
+ end
71
+
47
72
  describe "format_url" do
48
73
  let(:url) { ::LogStash::Util::SafeURI.new("http://localhost:9200/path/") }
49
74
  let(:path) { "_bulk" }
50
75
  subject { described_class.new(double("logger"), {}) }
51
76
 
52
77
  it "should add the path argument to the uri's path" do
53
- expect(java.net.URI.new(subject.format_url(url, path)).path).to eq("/path/_bulk")
78
+ expect(subject.format_url(url, path).path).to eq("/path/_bulk")
54
79
  end
55
80
 
56
81
  context "when uri contains query parameters" do
57
82
  let(:query_params) { "query=value&key=value2" }
58
83
  let(:url) { ::LogStash::Util::SafeURI.new("http://localhost:9200/path/?#{query_params}") }
59
- let(:formatted) { java.net.URI.new(subject.format_url(url, path))}
84
+ let(:formatted) { subject.format_url(url, path)}
60
85
 
61
86
  it "should retain query_params after format" do
62
87
  expect(formatted.query).to eq(query_params)
@@ -73,7 +98,7 @@ describe LogStash::Outputs::ElasticSearch::HttpClient::ManticoreAdapter do
73
98
 
74
99
  context "when the path contains query parameters" do
75
100
  let(:path) { "/special_bulk?pathParam=1"}
76
- let(:formatted) { java.net.URI.new(subject.format_url(url, path)) }
101
+ let(:formatted) { subject.format_url(url, path) }
77
102
 
78
103
  it "should add the path correctly" do
79
104
  expect(formatted.path).to eq("#{url.path}special_bulk")
@@ -86,10 +111,10 @@ describe LogStash::Outputs::ElasticSearch::HttpClient::ManticoreAdapter do
86
111
 
87
112
  context "when uri contains credentials" do
88
113
  let(:url) { ::LogStash::Util::SafeURI.new("http://myuser:mypass@localhost:9200") }
89
- let(:formatted) { java.net.URI.new(subject.format_url(url, path)) }
114
+ let(:formatted) { subject.format_url(url, path) }
90
115
 
91
116
  it "should remove credentials after format" do
92
- expect(formatted.user_info).to be_nil
117
+ expect(formatted.userinfo).to be_nil
93
118
  end
94
119
  end
95
120
  end
@@ -206,8 +206,10 @@ describe "outputs/elasticsearch" do
206
206
  )
207
207
  end
208
208
  let(:logger) { double("logger").as_null_object }
209
+ let(:response) { { :errors => [], :items => [] } }
209
210
 
210
211
  before(:each) do
212
+
211
213
  i = 0
212
214
  bulk_param = [["index", anything, event.to_hash]]
213
215
 
@@ -219,7 +221,7 @@ describe "outputs/elasticsearch" do
219
221
  if i == 1
220
222
  raise error
221
223
  end
222
- end
224
+ end.and_return(response)
223
225
  eso.multi_receive([event])
224
226
  end
225
227
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.1
4
+ version: 8.1.1
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-16 00:00:00.000000000 Z
11
+ date: 2017-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement