logstash-output-elasticsearch 3.0.2-java → 4.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -3
- data/Gemfile +1 -1
- data/lib/logstash/outputs/elasticsearch/common.rb +90 -58
- data/lib/logstash/outputs/elasticsearch/common_configs.rb +12 -32
- data/lib/logstash/outputs/elasticsearch/http_client/manticore_adapter.rb +63 -0
- data/lib/logstash/outputs/elasticsearch/http_client/pool.rb +378 -0
- data/lib/logstash/outputs/elasticsearch/http_client.rb +70 -64
- data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +15 -4
- data/lib/logstash/outputs/elasticsearch/template_manager.rb +1 -1
- data/lib/logstash/outputs/elasticsearch.rb +27 -4
- data/logstash-output-elasticsearch.gemspec +3 -5
- data/spec/es_spec_helper.rb +1 -0
- data/spec/fixtures/5x_node_resp.json +2 -0
- data/spec/integration/outputs/create_spec.rb +2 -5
- data/spec/integration/outputs/index_spec.rb +1 -1
- data/spec/integration/outputs/parent_spec.rb +1 -3
- data/spec/integration/outputs/pipeline_spec.rb +1 -2
- data/spec/integration/outputs/retry_spec.rb +51 -49
- data/spec/integration/outputs/routing_spec.rb +1 -1
- data/spec/integration/outputs/secure_spec.rb +4 -8
- data/spec/integration/outputs/templates_spec.rb +12 -8
- data/spec/integration/outputs/update_spec.rb +13 -27
- data/spec/unit/outputs/elasticsearch/http_client/manticore_adapter_spec.rb +25 -0
- data/spec/unit/outputs/elasticsearch/http_client/pool_spec.rb +142 -0
- data/spec/unit/outputs/elasticsearch/http_client_spec.rb +8 -22
- data/spec/unit/outputs/elasticsearch_proxy_spec.rb +5 -6
- data/spec/unit/outputs/elasticsearch_spec.rb +33 -30
- data/spec/unit/outputs/elasticsearch_ssl_spec.rb +10 -6
- metadata +72 -87
- data/lib/logstash/outputs/elasticsearch/buffer.rb +0 -124
- data/spec/unit/buffer_spec.rb +0 -118
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1463814b1c058872439a7cff351407b2c2a6d442
|
4
|
+
data.tar.gz: 28b8279a5cf3bb64e2003bc7e5272989918a2eb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d7ac9a08e59b1698121a9aaf13b195a032961a3df092c7a58ed183611869efe95803928e5dce25fe8b8e9a0394a1eae7e15cb4b2fa4b7b44a0bee1362321bdb
|
7
|
+
data.tar.gz: a97ecd3ea4a6d391b7b7e77d38956d9c7c8089ca5406513301a2e46e5f1e12fb82edd4f0a0dce048a473e903965012246c8b99978fb207eae384fb35a4887238
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,25 @@
|
|
1
|
+
## 4.1.0
|
2
|
+
- breaking,config: Removed obsolete config `host` and `port`. Please use the `hosts` config with the `[host:port]` syntax.
|
3
|
+
- breaking,config: Removed obsolete config `index_type`. Please use `document_type` instead.
|
4
|
+
- breaking,config: Set config `max_retries` and `retry_max_items` as obsolete
|
5
|
+
|
6
|
+
## 4.0.0
|
7
|
+
- Make this plugin threadsafe. Workers no longer needed or supported
|
8
|
+
- Add pool_max and pool_max_per_route options
|
9
|
+
|
1
10
|
## 3.0.2
|
2
|
-
|
11
|
+
- Fix issues where URI based paths in 'hosts' would not function correctly
|
12
|
+
|
3
13
|
## 3.0.1
|
4
|
-
|
14
|
+
- Republish all the gems under jruby.
|
15
|
+
|
5
16
|
## 3.0.0
|
6
|
-
|
17
|
+
- Update the plugin to the version 2.0 of the plugin api, this change is required for Logstash 5.0 compatibility. See https://github.com/elastic/logstash/issues/5141
|
18
|
+
|
7
19
|
## 2.7.0
|
8
20
|
- Add `pipeline` configuration option for setting an ingest pipeline to run upon indexing
|
9
21
|
|
22
|
+
|
10
23
|
## 2.6.2
|
11
24
|
- Fix bug where update index actions would not work with events with 'data' field
|
12
25
|
|
data/Gemfile
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require "logstash/outputs/elasticsearch/template_manager"
|
2
|
-
require "logstash/outputs/elasticsearch/buffer"
|
3
2
|
|
4
3
|
module LogStash; module Outputs; class ElasticSearch;
|
5
4
|
module Common
|
@@ -13,16 +12,11 @@ module LogStash; module Outputs; class ElasticSearch;
|
|
13
12
|
setup_hosts # properly sets @hosts
|
14
13
|
build_client
|
15
14
|
install_template
|
16
|
-
setup_buffer_and_handler
|
17
15
|
check_action_validity
|
18
16
|
|
19
17
|
@logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts)
|
20
18
|
end
|
21
19
|
|
22
|
-
def receive(event)
|
23
|
-
@buffer << event_action_tuple(event)
|
24
|
-
end
|
25
|
-
|
26
20
|
# Receive an array of events and immediately attempt to index them (no buffering)
|
27
21
|
def multi_receive(events)
|
28
22
|
events.each_slice(@flush_size) do |slice|
|
@@ -37,10 +31,6 @@ module LogStash; module Outputs; class ElasticSearch;
|
|
37
31
|
[action, params, event]
|
38
32
|
end
|
39
33
|
|
40
|
-
def flush
|
41
|
-
@buffer.flush
|
42
|
-
end
|
43
|
-
|
44
34
|
def setup_hosts
|
45
35
|
@hosts = Array(@hosts)
|
46
36
|
if @hosts.empty?
|
@@ -53,12 +43,6 @@ module LogStash; module Outputs; class ElasticSearch;
|
|
53
43
|
TemplateManager.install_template(self)
|
54
44
|
end
|
55
45
|
|
56
|
-
def setup_buffer_and_handler
|
57
|
-
@buffer = ::LogStash::Outputs::ElasticSearch::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions|
|
58
|
-
retrying_submit(actions)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
46
|
def check_action_validity
|
63
47
|
raise LogStash::ConfigurationError, "No action specified!" unless @action
|
64
48
|
|
@@ -75,33 +59,55 @@ module LogStash; module Outputs; class ElasticSearch;
|
|
75
59
|
VALID_HTTP_ACTIONS
|
76
60
|
end
|
77
61
|
|
78
|
-
def retrying_submit(actions)
|
62
|
+
def retrying_submit(actions)
|
79
63
|
# Initially we submit the full list of actions
|
80
64
|
submit_actions = actions
|
81
65
|
|
66
|
+
sleep_interval = @retry_initial_interval
|
67
|
+
|
82
68
|
while submit_actions && submit_actions.length > 0
|
83
|
-
|
69
|
+
|
84
70
|
# We retry with whatever is didn't succeed
|
85
71
|
begin
|
86
72
|
submit_actions = submit(submit_actions)
|
73
|
+
if submit_actions && submit_actions.size > 0
|
74
|
+
@logger.error("Retrying individual actions")
|
75
|
+
submit_actions.each {|action| @logger.error("Action", action) }
|
76
|
+
end
|
87
77
|
rescue => e
|
88
|
-
@logger.
|
89
|
-
:
|
78
|
+
@logger.error("Encountered an unexpected error submitting a bulk request! Will retry.",
|
79
|
+
:error_message => e.message,
|
90
80
|
:class => e.class.name,
|
91
81
|
:backtrace => e.backtrace)
|
92
82
|
end
|
93
83
|
|
94
|
-
|
84
|
+
# Everything was a success!
|
85
|
+
break if !submit_actions || submit_actions.empty?
|
86
|
+
|
87
|
+
# If we're retrying the action sleep for the recommended interval
|
88
|
+
# Double the interval for the next time through to achieve exponential backoff
|
89
|
+
Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
|
90
|
+
sleep_interval = next_sleep_interval(sleep_interval)
|
95
91
|
end
|
96
92
|
end
|
97
93
|
|
98
|
-
def
|
99
|
-
|
94
|
+
def sleep_for_interval(sleep_interval)
|
95
|
+
Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
|
96
|
+
next_sleep_interval(sleep_interval)
|
97
|
+
end
|
100
98
|
|
101
|
-
|
99
|
+
def next_sleep_interval(current_interval)
|
100
|
+
doubled = current_interval * 2
|
101
|
+
doubled > @retry_max_interval ? @retry_max_interval : doubled
|
102
|
+
end
|
102
103
|
|
103
|
-
|
104
|
-
|
104
|
+
def submit(actions)
|
105
|
+
bulk_response = safe_bulk(actions)
|
106
|
+
|
107
|
+
# If the response is nil that means we were in a retry loop
|
108
|
+
# and aborted since we're shutting down
|
109
|
+
# If it did return and there are no errors we're good as well
|
110
|
+
return if bulk_response.nil? || !bulk_response["errors"]
|
105
111
|
|
106
112
|
actions_to_retry = []
|
107
113
|
bulk_response["items"].each_with_index do |response,idx|
|
@@ -168,38 +174,64 @@ module LogStash; module Outputs; class ElasticSearch;
|
|
168
174
|
end
|
169
175
|
|
170
176
|
# Rescue retryable errors during bulk submission
|
171
|
-
def safe_bulk(
|
172
|
-
@
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
177
|
+
def safe_bulk(actions)
|
178
|
+
sleep_interval = @retry_initial_interval
|
179
|
+
begin
|
180
|
+
es_actions = actions.map {|action_type, params, event| [action_type, params, event.to_hash]}
|
181
|
+
response = @client.bulk(es_actions)
|
182
|
+
response
|
183
|
+
rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::HostUnreachableError => e
|
184
|
+
# If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
|
185
|
+
# and let the user sort it out from there
|
186
|
+
@logger.error(
|
187
|
+
"Attempted to send a bulk request to elasticsearch'"+
|
188
|
+
" but Elasticsearch appears to be unreachable or down!",
|
189
|
+
:error_message => e.message,
|
190
|
+
:class => e.class.name,
|
191
|
+
:will_retry_in_seconds => sleep_interval
|
192
|
+
)
|
193
|
+
@logger.debug("Failed actions for last bad bulk request!", :actions => actions)
|
194
|
+
|
195
|
+
# We retry until there are no errors! Errors should all go to the retry queue
|
196
|
+
sleep_interval = sleep_for_interval(sleep_interval)
|
197
|
+
retry unless @stopping.true?
|
198
|
+
rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::NoConnectionAvailableError => e
|
199
|
+
@logger.error(
|
200
|
+
"Attempted to send a bulk request to elasticsearch, but no there are no living connections in the connection pool. Perhaps Elasticsearch is unreachable or down?",
|
201
|
+
:error_message => e.message,
|
202
|
+
:class => e.class.name,
|
203
|
+
:will_retry_in_seconds => sleep_interval
|
204
|
+
)
|
205
|
+
Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
|
206
|
+
sleep_interval = next_sleep_interval(sleep_interval)
|
207
|
+
retry unless @stopping.true?
|
208
|
+
rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError => e
|
209
|
+
if RETRYABLE_CODES.include?(e.response_code)
|
210
|
+
log_hash = {:code => e.response_code, :url => e.url}
|
211
|
+
log_hash[:body] = e.body if @logger.debug? # Generally this is too verbose
|
212
|
+
@logger.error("Attempted to send a bulk request to elasticsearch but received a bad HTTP response code!", log_hash)
|
213
|
+
|
214
|
+
sleep_interval = sleep_for_interval(sleep_interval)
|
215
|
+
retry unless @stopping.true?
|
216
|
+
else
|
217
|
+
@logger.error("Got a bad response code from server, but this code is not considered retryable. Request will be dropped", :code => e.code)
|
218
|
+
end
|
219
|
+
rescue => e
|
220
|
+
# Stuff that should never happen
|
221
|
+
# For all other errors print out full connection issues
|
222
|
+
@logger.error(
|
223
|
+
"An unknown error occurred sending a bulk request to Elasticsearch. We will retry indefinitely",
|
224
|
+
:error_message => e.message,
|
225
|
+
:error_class => e.class.name,
|
226
|
+
:backtrace => e.backtrace
|
227
|
+
)
|
228
|
+
|
229
|
+
@logger.debug("Failed actions for last bad bulk request!", :actions => actions)
|
230
|
+
|
231
|
+
# We retry until there are no errors! Errors should all go to the retry queue
|
232
|
+
sleep_interval = sleep_for_interval(sleep_interval)
|
233
|
+
retry unless @stopping.true?
|
234
|
+
end
|
203
235
|
end
|
204
236
|
end
|
205
237
|
end; end; end
|
@@ -6,16 +6,10 @@ module LogStash; module Outputs; class ElasticSearch
|
|
6
6
|
# delete old data or only search specific date ranges.
|
7
7
|
# Indexes may not contain uppercase characters.
|
8
8
|
# For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}.
|
9
|
-
# LS uses Joda to format the index pattern from event timestamp.
|
10
|
-
# Joda formats are defined http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].
|
9
|
+
# LS uses Joda to format the index pattern from event timestamp.
|
10
|
+
# Joda formats are defined http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].
|
11
11
|
mod.config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
|
12
12
|
|
13
|
-
# The index type to write events to. Generally you should try to write only
|
14
|
-
# similar events to the same 'type'. String expansion `%{foo}` works here.
|
15
|
-
#
|
16
|
-
# Deprecated in favor of `docoument_type` field.
|
17
|
-
mod.config :index_type, :validate => :string, :obsolete => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
|
18
|
-
|
19
13
|
# The document type to write events to. Generally you should try to write only
|
20
14
|
# similar events to the same 'type'. String expansion `%{foo}` works here.
|
21
15
|
# Unless you set 'document_type', the event 'type' will be used if it exists
|
@@ -81,28 +75,12 @@ module LogStash; module Outputs; class ElasticSearch
|
|
81
75
|
# to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes in Elasticsearch.
|
82
76
|
mod.config :hosts, :validate => :array, :default => ["127.0.0.1"]
|
83
77
|
|
84
|
-
mod.config :host, :obsolete => "Please use the 'hosts' setting instead. You can specify multiple entries separated by comma in 'host:port' format."
|
85
|
-
|
86
|
-
# The port setting is obsolete. Please use the 'hosts' setting instead.
|
87
|
-
# Hosts entries can be in "host:port" format.
|
88
|
-
mod.config :port, :obsolete => "Please use the 'hosts' setting instead. Hosts entries can be in 'host:port' format."
|
89
|
-
|
90
78
|
# This plugin uses the bulk index API for improved indexing performance.
|
91
|
-
#
|
79
|
+
# This setting defines the maximum sized bulk request Logstash will make
|
92
80
|
# You you may want to increase this to be in line with your pipeline's batch size.
|
93
81
|
# If you specify a number larger than the batch size of your pipeline it will have no effect,
|
94
82
|
# save for the case where a filter increases the size of an inflight batch by outputting
|
95
83
|
# events.
|
96
|
-
#
|
97
|
-
# In Logstashes <= 2.1 this plugin uses its own internal buffer of events.
|
98
|
-
# This config option sets that size. In these older logstashes this size may
|
99
|
-
# have a significant impact on heap usage, whereas in 2.2+ it will never increase it.
|
100
|
-
# To make efficient bulk API calls, we will buffer a certain number of
|
101
|
-
# events before flushing that out to Elasticsearch. This setting
|
102
|
-
# controls how many events will be buffered before sending a batch
|
103
|
-
# of events. Increasing the `flush_size` has an effect on Logstash's heap size.
|
104
|
-
# Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big documents
|
105
|
-
# or have increased the `flush_size` to a higher value.
|
106
84
|
mod.config :flush_size, :validate => :number, :default => 500
|
107
85
|
|
108
86
|
# The amount of time since last flush before a flush is forced.
|
@@ -124,8 +102,8 @@ module LogStash; module Outputs; class ElasticSearch
|
|
124
102
|
# Create a new document with source if `document_id` doesn't exist in Elasticsearch
|
125
103
|
mod.config :doc_as_upsert, :validate => :boolean, :default => false
|
126
104
|
|
127
|
-
#
|
128
|
-
mod.config :max_retries, :
|
105
|
+
#Obsolete since 4.1.0
|
106
|
+
mod.config :max_retries, :obsolete => "This setting no longer does anything. Please remove it from your config"
|
129
107
|
|
130
108
|
# Set script name for scripted update mode
|
131
109
|
mod.config :script, :validate => :string, :default => ""
|
@@ -145,12 +123,14 @@ module LogStash; module Outputs; class ElasticSearch
|
|
145
123
|
# if enabled, script is in charge of creating non-existent document (scripted update)
|
146
124
|
mod.config :scripted_upsert, :validate => :boolean, :default => false
|
147
125
|
|
148
|
-
# Set
|
149
|
-
mod.config :
|
126
|
+
# Set initial interval in seconds between bulk retries. Doubled on each retry up to `retry_max_interval`
|
127
|
+
mod.config :retry_initial_interval, :validate => :number, :default => 2
|
128
|
+
|
129
|
+
# Set max interval in seconds between bulk retries.
|
130
|
+
mod.config :retry_max_interval, :validate => :number, :default => 64
|
150
131
|
|
151
|
-
#
|
152
|
-
|
153
|
-
mod.config :retry_max_items, :validate => :number, :default => 500, :deprecated => true
|
132
|
+
#Obsolete since 4.1.0
|
133
|
+
mod.config :retry_max_items, :obsolete => "This setting no longer does anything. Please remove it from your config"
|
154
134
|
|
155
135
|
# The number of times Elasticsearch should internally retry an update/upserted document
|
156
136
|
# See the https://www.elastic.co/guide/en/elasticsearch/guide/current/partial-updates.html[partial updates]
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'manticore'
|
2
|
+
|
3
|
+
module LogStash; module Outputs; class ElasticSearch; class HttpClient;
|
4
|
+
class ManticoreAdapter
|
5
|
+
attr_reader :manticore, :logger
|
6
|
+
|
7
|
+
def initialize(logger, options={})
|
8
|
+
@logger = logger
|
9
|
+
@options = options || {}
|
10
|
+
@options[:ssl] = @options[:ssl] || {}
|
11
|
+
|
12
|
+
# We manage our own retries directly, so let's disable them here
|
13
|
+
@options[:automatic_retries] = 0
|
14
|
+
# We definitely don't need cookies
|
15
|
+
@options[:cookies] = false
|
16
|
+
|
17
|
+
@request_options = @options[:headers] ? {:headers => @options[:headers]} : {}
|
18
|
+
@manticore = ::Manticore::Client.new(@options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def client
|
22
|
+
@manticore
|
23
|
+
end
|
24
|
+
|
25
|
+
# Performs the request by invoking {Transport::Base#perform_request} with a block.
|
26
|
+
#
|
27
|
+
# @return [Response]
|
28
|
+
# @see Transport::Base#perform_request
|
29
|
+
#
|
30
|
+
def perform_request(url, method, path, params={}, body=nil)
|
31
|
+
|
32
|
+
|
33
|
+
params = (params || {}).merge @request_options
|
34
|
+
params[:body] = body if body
|
35
|
+
url_and_path = (url + path).to_s # Convert URI object to string
|
36
|
+
|
37
|
+
|
38
|
+
resp = @manticore.send(method.downcase, url_and_path, params)
|
39
|
+
|
40
|
+
# Manticore returns lazy responses by default
|
41
|
+
# We want to block for our usage, this will wait for the repsonse
|
42
|
+
# to finish
|
43
|
+
resp.call
|
44
|
+
|
45
|
+
# 404s are excluded because they are valid codes in the case of
|
46
|
+
# template installation. We might need a better story around this later
|
47
|
+
# but for our current purposes this is correct
|
48
|
+
if resp.code < 200 || resp.code > 299 && resp.code != 404
|
49
|
+
raise ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError.new(resp.code, url_and_path, body)
|
50
|
+
end
|
51
|
+
|
52
|
+
resp
|
53
|
+
end
|
54
|
+
|
55
|
+
def close
|
56
|
+
@manticore.close
|
57
|
+
end
|
58
|
+
|
59
|
+
def host_unreachable_exceptions
|
60
|
+
[::Manticore::Timeout,::Manticore::SocketException, ::Manticore::ClientProtocolException, ::Manticore::ResolutionFailure, Manticore::SocketTimeout]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end; end; end; end
|
@@ -0,0 +1,378 @@
|
|
1
|
+
module LogStash; module Outputs; class ElasticSearch; class HttpClient;
|
2
|
+
class Pool
|
3
|
+
class NoConnectionAvailableError < Error; end
|
4
|
+
class BadResponseCodeError < Error
|
5
|
+
attr_reader :url, :response_code, :body
|
6
|
+
|
7
|
+
def initialize(response_code, url, body)
|
8
|
+
@response_code = response_code
|
9
|
+
@url = url
|
10
|
+
@body = body
|
11
|
+
end
|
12
|
+
|
13
|
+
def message
|
14
|
+
"Got response code '#{response_code}' contact Elasticsrearch at URL '#{@url}'"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
class HostUnreachableError < Error;
|
18
|
+
attr_reader :original_error, :url
|
19
|
+
|
20
|
+
def initialize(original_error, url)
|
21
|
+
@original_error = original_error
|
22
|
+
@url = url
|
23
|
+
end
|
24
|
+
|
25
|
+
def message
|
26
|
+
"Elasticsearch Unreachable: [#{@url}][#{original_error.class}] #{original_error.message}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_reader :logger, :adapter, :sniffing, :sniffer_delay, :resurrect_delay, :auth, :healthcheck_path
|
31
|
+
|
32
|
+
DEFAULT_OPTIONS = {
|
33
|
+
:healthcheck_path => '/'.freeze,
|
34
|
+
:scheme => 'http',
|
35
|
+
:resurrect_delay => 5,
|
36
|
+
:auth => nil, # Can be set to {:user => 'user', :password => 'pass'}
|
37
|
+
:sniffing => false,
|
38
|
+
:sniffer_delay => 10,
|
39
|
+
}.freeze
|
40
|
+
|
41
|
+
def initialize(logger, adapter, initial_urls=[], options={})
|
42
|
+
@logger = logger
|
43
|
+
@adapter = adapter
|
44
|
+
|
45
|
+
DEFAULT_OPTIONS.merge(options).tap do |merged|
|
46
|
+
@healthcheck_path = merged[:healthcheck_path]
|
47
|
+
@scheme = merged[:scheme]
|
48
|
+
@resurrect_delay = merged[:resurrect_delay]
|
49
|
+
@auth = merged[:auth]
|
50
|
+
@sniffing = merged[:sniffing]
|
51
|
+
@sniffer_delay = merged[:sniffer_delay]
|
52
|
+
end
|
53
|
+
|
54
|
+
# Override the scheme if one is explicitly set in urls
|
55
|
+
if initial_urls.any? {|u| u.scheme == 'https'} && @scheme == 'http'
|
56
|
+
raise ArgumentError, "HTTP was set as scheme, but an HTTPS URL was passed in!"
|
57
|
+
end
|
58
|
+
|
59
|
+
# Used for all concurrent operations in this class
|
60
|
+
@state_mutex = Mutex.new
|
61
|
+
|
62
|
+
# Holds metadata about all URLs
|
63
|
+
@url_info = {}
|
64
|
+
@stopping = false
|
65
|
+
|
66
|
+
update_urls(initial_urls)
|
67
|
+
start_resurrectionist
|
68
|
+
start_sniffer if @sniffing
|
69
|
+
end
|
70
|
+
|
71
|
+
def close
|
72
|
+
@state_mutex.synchronize { @stopping = true }
|
73
|
+
|
74
|
+
logger.debug "Stopping sniffer"
|
75
|
+
stop_sniffer
|
76
|
+
|
77
|
+
logger.debug "Stopping resurrectionist"
|
78
|
+
stop_resurrectionist
|
79
|
+
|
80
|
+
logger.debug "Waiting for in use manticore connections"
|
81
|
+
wait_for_in_use_connections
|
82
|
+
|
83
|
+
logger.debug("Closing adapter #{@adapter}")
|
84
|
+
@adapter.close
|
85
|
+
end
|
86
|
+
|
87
|
+
def wait_for_in_use_connections
|
88
|
+
until in_use_connections.empty?
|
89
|
+
logger.info "Blocked on shutdown to in use connections #{@state_mutex.synchronize {@url_info}}"
|
90
|
+
sleep 1
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def in_use_connections
|
95
|
+
@state_mutex.synchronize { @url_info.values.select {|v| v[:in_use] > 0 } }
|
96
|
+
end
|
97
|
+
|
98
|
+
def alive_urls_count
|
99
|
+
@state_mutex.synchronize { @url_info.values.select {|v| !v[:dead] }.count }
|
100
|
+
end
|
101
|
+
|
102
|
+
def url_info
|
103
|
+
@state_mutex.synchronize { @url_info }
|
104
|
+
end
|
105
|
+
|
106
|
+
def urls
|
107
|
+
url_info.keys
|
108
|
+
end
|
109
|
+
|
110
|
+
def until_stopped(task_name, delay)
|
111
|
+
last_done = Time.now
|
112
|
+
until @state_mutex.synchronize { @stopping }
|
113
|
+
begin
|
114
|
+
now = Time.now
|
115
|
+
if (now - last_done) >= delay
|
116
|
+
last_done = now
|
117
|
+
yield
|
118
|
+
end
|
119
|
+
sleep 1
|
120
|
+
rescue => e
|
121
|
+
logger.warn(
|
122
|
+
"Error while performing #{task_name}",
|
123
|
+
:error_message => e.message,
|
124
|
+
:class => e.class.name,
|
125
|
+
:backtrace => e.backtrace
|
126
|
+
)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def start_sniffer
|
132
|
+
@sniffer = Thread.new do
|
133
|
+
until_stopped("sniffing", sniffer_delay) do
|
134
|
+
begin
|
135
|
+
sniff!
|
136
|
+
rescue NoConnectionAvailableError => e
|
137
|
+
@state_mutex.synchronize { # Synchronize around @url_info
|
138
|
+
logger.warn("Elasticsearch output attempted to sniff for new connections but cannot. No living connections are detected. Pool contains the following current URLs", :url_info => @url_info) }
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Sniffs the cluster then updates the internal URLs
|
145
|
+
def sniff!
|
146
|
+
update_urls(check_sniff)
|
147
|
+
end
|
148
|
+
|
149
|
+
ES1_SNIFF_RE_URL = /\[([^\/]*)?\/?([^:]*):([0-9]+)\]/
|
150
|
+
ES2_SNIFF_RE_URL = /([^\/]*)?\/?([^:]*):([0-9]+)/
|
151
|
+
# Sniffs and returns the results. Does not update internal URLs!
|
152
|
+
def check_sniff
|
153
|
+
url, resp = perform_request(:get, '_nodes')
|
154
|
+
parsed = LogStash::Json.load(resp.body)
|
155
|
+
parsed['nodes'].map do |id,info|
|
156
|
+
# TODO Make sure this works with shield. Does that listed
|
157
|
+
# stuff as 'https_address?'
|
158
|
+
addr_str = info['http_address'].to_s
|
159
|
+
next unless addr_str # Skip hosts with HTTP disabled
|
160
|
+
|
161
|
+
|
162
|
+
# Only connect to nodes that serve data
|
163
|
+
# this will skip connecting to client, tribe, and master only nodes
|
164
|
+
# Note that if 'attributes' is NOT set, then that's just a regular node
|
165
|
+
# with master + data + client enabled, so we allow that
|
166
|
+
attributes = info['attributes']
|
167
|
+
next if attributes && attributes['data'] == 'false'
|
168
|
+
|
169
|
+
matches = addr_str.match(ES1_SNIFF_RE_URL) || addr_str.match(ES2_SNIFF_RE_URL)
|
170
|
+
if matches
|
171
|
+
host = matches[1].empty? ? matches[2] : matches[1]
|
172
|
+
port = matches[3]
|
173
|
+
URI.parse("#{@scheme}://#{host}:#{port}")
|
174
|
+
end
|
175
|
+
end.compact
|
176
|
+
end
|
177
|
+
|
178
|
+
def stop_sniffer
|
179
|
+
@sniffer.join if @sniffer
|
180
|
+
end
|
181
|
+
|
182
|
+
def sniffer_alive?
|
183
|
+
@sniffer ? @sniffer.alive? : nil
|
184
|
+
end
|
185
|
+
|
186
|
+
def start_resurrectionist
|
187
|
+
@resurrectionist = Thread.new do
|
188
|
+
until_stopped("resurrection", @resurrect_delay) do
|
189
|
+
resurrect_dead!
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def resurrect_dead!
|
195
|
+
# Try to keep locking granularity low such that we don't affect IO...
|
196
|
+
@state_mutex.synchronize { @url_info.select {|url,meta| meta[:dead] } }.each do |url,meta|
|
197
|
+
begin
|
198
|
+
@logger.info("Checking url #{url} with path #{@healthcheck_path} to see if node resurrected")
|
199
|
+
perform_request_to_url(url, "HEAD", @healthcheck_path)
|
200
|
+
# If no exception was raised it must have succeeded!
|
201
|
+
logger.warn("Resurrected connection to dead ES instance at #{url}")
|
202
|
+
@state_mutex.synchronize { meta[:dead] = false }
|
203
|
+
rescue HostUnreachableError => e
|
204
|
+
logger.debug("Attempted to resurrect connection to dead ES instance at #{url}, got an error [#{e.class}] #{e.message}")
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def stop_resurrectionist
|
210
|
+
@resurrectionist.join
|
211
|
+
end
|
212
|
+
|
213
|
+
def resurrectionist_alive?
|
214
|
+
@resurrectionist.alive?
|
215
|
+
end
|
216
|
+
|
217
|
+
def perform_request(method, path, params={}, body=nil)
|
218
|
+
with_connection do |url|
|
219
|
+
resp = perform_request_to_url(url, method, path, params, body)
|
220
|
+
[url, resp]
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
[:get, :put, :post, :delete, :patch, :head].each do |method|
|
225
|
+
define_method(method) do |path, params={}, body=nil|
|
226
|
+
perform_request(method, path, params, body)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def perform_request_to_url(url, method, path, params={}, body=nil)
|
231
|
+
res = @adapter.perform_request(url, method, path, params, body)
|
232
|
+
rescue *@adapter.host_unreachable_exceptions => e
|
233
|
+
raise HostUnreachableError.new(e, url), "Could not reach host #{e.class}: #{e.message}"
|
234
|
+
end
|
235
|
+
|
236
|
+
def normalize_url(uri)
|
237
|
+
raise ArgumentError, "Only URI objects may be passed in!" unless uri.is_a?(URI)
|
238
|
+
uri = uri.clone
|
239
|
+
|
240
|
+
# Set credentials if need be
|
241
|
+
if @auth && !uri.user
|
242
|
+
uri.user ||= @auth[:user]
|
243
|
+
uri.password ||= @auth[:password]
|
244
|
+
end
|
245
|
+
|
246
|
+
uri.scheme = @scheme
|
247
|
+
|
248
|
+
uri
|
249
|
+
end
|
250
|
+
|
251
|
+
def update_urls(new_urls)
|
252
|
+
# Normalize URLs
|
253
|
+
new_urls = new_urls.map(&method(:normalize_url))
|
254
|
+
|
255
|
+
# Used for logging nicely
|
256
|
+
state_changes = {:removed => [], :added => []}
|
257
|
+
@state_mutex.synchronize do
|
258
|
+
# Add new connections
|
259
|
+
new_urls.each do |url|
|
260
|
+
# URI objects don't have real hash equality! So, since this isn't perf sensitive we do a linear scan
|
261
|
+
unless @url_info.keys.include?(url)
|
262
|
+
state_changes[:added] << url.to_s
|
263
|
+
add_url(url)
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
# Delete connections not in the new list
|
268
|
+
@url_info.each do |url,_|
|
269
|
+
unless new_urls.include?(url)
|
270
|
+
state_changes[:removed] << url.to_s
|
271
|
+
remove_url(url)
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
if state_changes[:removed].size > 0 || state_changes[:added].size > 0
|
277
|
+
logger.info("Elasticsearch pool URLs updated", :changes => state_changes)
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def size
|
282
|
+
@state_mutex.synchronize { @url_info.size }
|
283
|
+
end
|
284
|
+
|
285
|
+
def add_url(url)
|
286
|
+
@url_info[url] ||= empty_url_meta
|
287
|
+
end
|
288
|
+
|
289
|
+
def remove_url(url)
|
290
|
+
@url_info.delete(url)
|
291
|
+
end
|
292
|
+
|
293
|
+
def empty_url_meta
|
294
|
+
{
|
295
|
+
:in_use => 0,
|
296
|
+
:dead => false
|
297
|
+
}
|
298
|
+
end
|
299
|
+
|
300
|
+
def with_connection
|
301
|
+
url, url_meta = get_connection
|
302
|
+
|
303
|
+
# Custom error class used here so that users may retry attempts if they receive this error
|
304
|
+
# should they choose to
|
305
|
+
raise NoConnectionAvailableError, "No Available connections" unless url
|
306
|
+
yield url
|
307
|
+
rescue HostUnreachableError => e
|
308
|
+
# Mark the connection as dead here since this is likely not transient
|
309
|
+
mark_dead(url, e)
|
310
|
+
raise e
|
311
|
+
rescue BadResponseCodeError => e
|
312
|
+
# These aren't discarded from the pool because these are often very transient
|
313
|
+
# errors
|
314
|
+
raise e
|
315
|
+
rescue => e
|
316
|
+
logger.warn("UNEXPECTED POOL ERROR", :e => e)
|
317
|
+
raise e
|
318
|
+
ensure
|
319
|
+
return_connection(url)
|
320
|
+
end
|
321
|
+
|
322
|
+
def mark_dead(url, error)
|
323
|
+
@state_mutex.synchronize do
|
324
|
+
meta = @url_info[url]
|
325
|
+
# In case a sniff happened removing the metadata just before there's nothing to mark
|
326
|
+
# This is an extreme edge case, but it can happen!
|
327
|
+
return unless meta
|
328
|
+
logger.warn("Marking url as dead. Last error: [#{error.class}] #{error.message}",
|
329
|
+
:url => url, :error_message => error.message, :error_class => error.class.name)
|
330
|
+
meta[:dead] = true
|
331
|
+
meta[:last_error] = error
|
332
|
+
meta[:last_errored_at] = Time.now
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def url_meta(url)
|
337
|
+
@state_mutex.synchronize do
|
338
|
+
@url_info[url]
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
def get_connection
|
343
|
+
@state_mutex.synchronize do
|
344
|
+
# The goal here is to pick a random connection from the least-in-use connections
|
345
|
+
# We want some randomness so that we don't hit the same node over and over, but
|
346
|
+
# we also want more 'fair' behavior in the event of high concurrency
|
347
|
+
eligible_set = nil
|
348
|
+
lowest_value_seen = nil
|
349
|
+
@url_info.each do |url,meta|
|
350
|
+
meta_in_use = meta[:in_use]
|
351
|
+
next if meta[:dead]
|
352
|
+
|
353
|
+
if lowest_value_seen.nil? || meta_in_use < lowest_value_seen
|
354
|
+
lowest_value_seen = meta_in_use
|
355
|
+
eligible_set = [[url, meta]]
|
356
|
+
elsif lowest_value_seen == meta_in_use
|
357
|
+
eligible_set << [url, meta]
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
return nil if eligible_set.nil?
|
362
|
+
|
363
|
+
pick, pick_meta = eligible_set.sample
|
364
|
+
pick_meta[:in_use] += 1
|
365
|
+
|
366
|
+
[pick, pick_meta]
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
def return_connection(url)
|
371
|
+
@state_mutex.synchronize do
|
372
|
+
if @url_info[url] # Guard against the condition where the connection has already been deleted
|
373
|
+
@url_info[url][:in_use] -= 1
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end; end; end; end;
|