logstash-output-elasticsearch 2.0.0.beta5-java → 2.0.0.beta6-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fc38fea51325f4a9a5703accfc568c1d50728915
4
- data.tar.gz: d941aa2bf701048bb5a59df378bc6d786f27c9e9
3
+ metadata.gz: c3914887728f304938a7f0414212daff422424a8
4
+ data.tar.gz: e627c4d8f3722e30f9ff4c32298792aa14adeeeb
5
5
  SHA512:
6
- metadata.gz: c0fe10fdfbc87183e69930513265739e7d85424c4097cbf9e6b4c4715dc7dafc92d21bfa0e5a7eeee34c23acd480603e2a77582406a03566764334afc5a4966f
7
- data.tar.gz: 43216b5e2c5fb317e491b6c2ee31472add33fc936594ff1c7e7f18428b69b214a4853b274aa6ef657d19930b83be430a410ffc2325041efe7c25da3ef800f6d0
6
+ metadata.gz: 4e5facd0d14d905fa723901d58b952b5547a35fdd81bc1848b2552aaeb8ae0f559ef1059b2e916bc838517b3e1d21227a089a316cb2e5ee545089934c992386d
7
+ data.tar.gz: e788b461bdc1e24fceebec5b72a0bc16f8f1f4603cd12b1c0e0af708beccb74f202a7a99c54b16f3554072bdfba7bbeccca9294be9bf408ff4765967883ddd24
@@ -50,9 +50,7 @@ module LogStash::Outputs::Elasticsearch
50
50
  end
51
51
  end.flatten
52
52
 
53
- bulk_response = @client.bulk(:body => bulk_body)
54
-
55
- self.class.normalize_bulk_response(bulk_response)
53
+ @client.bulk(:body => bulk_body)
56
54
  end
57
55
 
58
56
  def start_sniffing!
@@ -111,28 +109,6 @@ module LogStash::Outputs::Elasticsearch
111
109
  Elasticsearch::Client.new(client_options)
112
110
  end
113
111
 
114
- def self.normalize_bulk_response(bulk_response)
115
- if bulk_response["errors"]
116
- # The structure of the response from the REST Bulk API is follows:
117
- # {"took"=>74, "errors"=>true, "items"=>[{"create"=>{"_index"=>"logstash-2014.11.17",
118
- # "_type"=>"logs",
119
- # "_id"=>"AUxTS2C55Jrgi-hC6rQF",
120
- # "_version"=>1,
121
- # "status"=>400,
122
- # "error"=>"MapperParsingException[failed to parse]..."}}]}
123
- # where each `item` is a hash of {OPTYPE => Hash[]}. calling first, will retrieve
124
- # this hash as a single array with two elements, where the value is the second element (i.first[1])
125
- # then the status of that item is retrieved.
126
- {
127
- "errors" => true,
128
- "statuses" => bulk_response["items"].map { |i| i.first[1]['status'] },
129
- "details" => bulk_response["items"].select {|i| i.first[1]["error"] }.map {|i| i.first[1]}
130
- }
131
- else
132
- {"errors" => false}
133
- end
134
- end
135
-
136
112
  def template_exists?(name)
137
113
  @client.indices.get_template(:name => name)
138
114
  return true
@@ -10,44 +10,32 @@ require "thread" # for safe queueing
10
10
  require "uri" # for escaping user input
11
11
  require "logstash/outputs/elasticsearch/http_client"
12
12
 
13
- # This output lets you store logs in Elasticsearch and is the most recommended
14
- # output for Logstash. If you plan on using the Kibana web interface, you'll
15
- # want to use this output.
13
+ # This plugin is the recommended method of storing logs in Elasticsearch.
14
+ # If you plan on using the Kibana web interface, you'll want to use this output.
16
15
  #
17
- # This output only speaks the HTTP, which is the preferred protocol for interacting with Elasticsearch. By default
18
- # Elasticsearch exposes HTTP on port 9200.
19
- #
20
- # We strongly encourage the use of HTTP over the node protocol. It is just as
21
- # fast and far easier to administer. For those wishing to use the java protocol please see the 'elasticsearch_java' gem.
16
+ # This output only speaks the HTTP protocol. HTTP is the preferred protocol for interacting with Elasticsearch as of Logstash 2.0.
17
+ # We strongly encourage the use of HTTP over the node protocol for a number of reasons. HTTP is only marginally slower,
18
+ # yet far easier to administer and work with. When using the HTTP protocol one may upgrade Elasticsearch versions without having
19
+ # to upgrade Logstash in lock-step. For those wishing to use the node or transport protocols please see the 'elasticsearch_java' gem.
22
20
  #
23
21
  # You can learn more about Elasticsearch at <https://www.elastic.co/products/elasticsearch>
24
22
  #
25
23
  # ==== Retry Policy
26
24
  #
27
- # By default all bulk requests to ES are synchronous. Not all events in the bulk requests
28
- # always make it successfully. For example, there could be events which are not formatted
29
- # correctly for the index they are targeting (type mismatch in mapping). So that we minimize loss of
30
- # events, we have a specific retry policy in place. We retry all events which fail to be reached by
31
- # Elasticsearch for network related issues. We retry specific events which exhibit errors under a separate
32
- # policy described below. Events of this nature are ones which experience ES error codes described as
33
- # retryable errors.
34
- #
35
- # *Retryable Errors:*
25
+ # This plugin uses the Elasticsearch bulk API to optimize its imports into Elasticsearch. These requests may experience
26
+ # either partial or total failures. Events are retried if they fail due to either a network error or the status codes
27
+ # 429 (the server is busy), 409 (Version Conflict), or 503 (temporary overloading/maintenance).
36
28
  #
37
- # - 429, Too Many Requests (RFC6585)
38
- # - 503, The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.
39
- #
40
- # Here are the rules of what is retried when:
29
+ # The retry policy's logic can be described as follows:
41
30
  #
42
- # - Block and retry all events in bulk response that experiences transient network exceptions until
31
+ # - Block and retry all events in the bulk response that experience transient network exceptions until
43
32
  # a successful submission is received by Elasticsearch.
44
- # - Retry subset of sent events which resulted in ES errors of a retryable nature which can be found
45
- # in RETRYABLE_CODES
46
- # - For events which returned retryable error codes, they will be pushed onto a separate queue for
47
- # retrying events. events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries). The size of
48
- # this queue is capped by the value set in :retry_max_items.
49
- # - Events from the retry queue are submitted again either when the queue reaches its max size or when
50
- # the max interval time is reached, which is set in :retry_max_interval.
33
+ # - Retry the subset of sent events which resulted in ES errors of a retryable nature.
34
+ # - Events which returned retryable error codes will be pushed onto a separate queue for
35
+ # retrying events. Events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries).
36
+ # The size of this queue is capped by the value set in :retry_max_items.
37
+ # - Events from the retry queue are submitted again when the queue reaches its max size or when
38
+ # the max interval time is reached. The max interval time is configurable via :retry_max_interval.
51
39
  # - Events which are not retryable or have reached their max retry count are logged to stderr.
52
40
  class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
53
41
  attr_reader :client
@@ -360,19 +348,25 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
360
348
 
361
349
  bulk_response = @client.bulk(es_actions)
362
350
 
363
- if bulk_response["errors"]
364
- actions_with_responses = actions.zip(bulk_response['statuses'])
365
- actions_to_retry = []
366
- actions_with_responses.each do |action, resp_code|
367
- if RETRYABLE_CODES.include?(resp_code)
368
- @logger.warn "retrying failed action with response code: #{resp_code}"
369
- actions_to_retry << action
370
- elsif not SUCCESS_CODES.include?(resp_code)
371
- @logger.warn "failed action with response of #{resp_code}, dropping action: #{action}"
372
- end
351
+ next unless bulk_response["errors"]
352
+
353
+ actions_to_retry = []
354
+
355
+ bulk_response["items"].each_with_index do |resp,idx|
356
+ action_type, action_props = resp.first
357
+
358
+ status = action_props["status"]
359
+ action = actions[idx]
360
+
361
+ if RETRYABLE_CODES.include?(status)
362
+ @logger.warn "retrying failed action with response code: #{status}"
363
+ actions_to_retry << action
364
+ elsif not SUCCESS_CODES.include?(status)
365
+ @logger.warn "Failed action. ", status: status, action: action, response: resp
373
366
  end
374
- retry_push(actions_to_retry) unless actions_to_retry.empty?
375
367
  end
368
+
369
+ retry_push(actions_to_retry) unless actions_to_retry.empty?
376
370
  end
377
371
  end
378
372
 
@@ -0,0 +1,555 @@
1
+ # encoding: utf-8
2
+ require "logstash/namespace"
3
+ require "logstash/environment"
4
+ require "logstash/outputs/base"
5
+ require "logstash/json"
6
+ require "concurrent"
7
+ require "stud/buffer"
8
+ require "socket" # for Socket.gethostname
9
+ require "thread" # for safe queueing
10
+ require "uri" # for escaping user input
11
+ require "logstash/outputs/elasticsearch/http_client"
12
+
13
+ # This plugin is the recommended method of storing logs in Elasticsearch.
14
+ # If you plan on using the Kibana web interface, you'll want to use this output.
15
+ #
16
+ # This output only speaks the HTTP protocol. HTTP is the preferred protocol for interacting with Elasticsearch as of Logstash 2.0.
17
+ # We strongly encourage the use of HTTP over the node protocol for a number of reasons. HTTP is only marginally slower,
18
+ # yet far easier to administer and work with. When using the HTTP protocol one may upgrade Elasticsearch versions without having
19
+ # to upgrade Logstash in lock-step. For those wishing to use the node or transport protocols please see the 'elasticsearch_java' gem.
20
+ #
21
+ # You can learn more about Elasticsearch at <https://www.elastic.co/products/elasticsearch>
22
+ #
23
+ # ==== Retry Policy
24
+ #
25
+ # This plugin uses the Elasticsearch bulk API to optimize its imports into Elasticsearch. These requests may experience
26
+ # either partial or total failures. Events are retried if they fail due to either a network error or the status codes
27
+ # 429 (the server is busy), 409 (Version Conflict), or 503 (temporary overloading/maintenance).
28
+ #
29
+ # The retry policy's logic can be described as follows:
30
+ #
31
+ # - Block and retry all events in the bulk response that experience transient network exceptions until
32
+ # a successful submission is received by Elasticsearch.
33
+ # - Retry the subset of sent events which resulted in ES errors of a retryable nature.
34
+ # - Events which returned retryable error codes will be pushed onto a separate queue for
35
+ # retrying events. Events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries).
36
+ # The size of this queue is capped by the value set in :retry_max_items.
37
+ # - Events from the retry queue are submitted again when the queue reaches its max size or when
38
+ # the max interval time is reached. The max interval time is configurable via :retry_max_interval.
39
+ # - Events which are not retryable or have reached their max retry count are logged to stderr.
40
+ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
41
+ attr_reader :client
42
+
43
+ include Stud::Buffer
44
+ RETRYABLE_CODES = [409, 429, 503]
45
+ SUCCESS_CODES = [200, 201]
46
+
47
+ config_name "elasticsearch"
48
+
49
+ # The index to write events to. This can be dynamic using the `%{foo}` syntax.
50
+ # The default value will partition your indices by day so you can more easily
51
+ # delete old data or only search specific date ranges.
52
+ # Indexes may not contain uppercase characters.
53
+ # For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}
54
+ config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
55
+
56
+ # The index type to write events to. Generally you should try to write only
57
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
58
+ #
59
+ # Deprecated in favor of `document_type` field.
60
+ config :index_type, :validate => :string, :deprecated => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
61
+
62
+ # The document type to write events to. Generally you should try to write only
63
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
64
+ # Unless you set 'document_type', the event 'type' will be used if it exists
65
+ # otherwise the document type will be assigned the value of 'logs'
66
+ config :document_type, :validate => :string
67
+
68
+ # Starting in Logstash 1.3 (unless you set option `manage_template` to false)
69
+ # a default mapping template for Elasticsearch will be applied, if you do not
70
+ # already have one set to match the index pattern defined (default of
71
+ # `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
72
+ # the template will be applied to all indices starting with `logstash-*`
73
+ #
74
+ # If you have dynamic templating (e.g. creating indices based on field names)
75
+ # then you should set `manage_template` to false and use the REST API to upload
76
+ # your templates manually.
77
+ config :manage_template, :validate => :boolean, :default => true
78
+
79
+ # This configuration option defines how the template is named inside Elasticsearch.
80
+ # Note that if you have used the template management features and subsequently
81
+ # change this, you will need to prune the old template manually, e.g.
82
+ #
83
+ # `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
84
+ #
85
+ # where `OldTemplateName` is whatever the former setting was.
86
+ config :template_name, :validate => :string, :default => "logstash"
87
+
88
+ # You can set the path to your own template here, if you so desire.
89
+ # If not set, the included template will be used.
90
+ config :template, :validate => :path
91
+
92
+ # Overwrite the current template with whatever is configured
93
+ # in the `template` and `template_name` directives.
94
+ config :template_overwrite, :validate => :boolean, :default => false
95
+
96
+ # The document ID for the index. Useful for overwriting existing entries in
97
+ # Elasticsearch with the same ID.
98
+ config :document_id, :validate => :string
99
+
100
+ # A routing override to be applied to all processed events.
101
+ # This can be dynamic using the `%{foo}` syntax.
102
+ config :routing, :validate => :string
103
+
104
+ # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `host` parameter.
105
+ # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
106
+ # `"127.0.0.1"`
107
+ # `["127.0.0.1:9200","127.0.0.2:9200"]`
108
+ # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
109
+ # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
110
+
111
+ config :hosts, :validate => :array
112
+
113
+ # You can set the remote port as part of the host, or explicitly here as well
114
+ config :port, :validate => :string, :default => 9200
115
+
116
+ # This plugin uses the bulk index api for improved indexing performance.
117
+ # To make efficient bulk api calls, we will buffer a certain number of
118
+ # events before flushing that out to Elasticsearch. This setting
119
+ # controls how many events will be buffered before sending a batch
120
+ # of events.
121
+ config :flush_size, :validate => :number, :default => 500
122
+
123
+ # The amount of time since last flush before a flush is forced.
124
+ #
125
+ # This setting helps ensure slow event rates don't get stuck in Logstash.
126
+ # For example, if your `flush_size` is 100, and you have received 10 events,
127
+ # and it has been more than `idle_flush_time` seconds since the last flush,
128
+ # Logstash will flush those 10 events automatically.
129
+ #
130
+ # This helps keep both fast and slow log streams moving along in
131
+ # near-real-time.
132
+ config :idle_flush_time, :validate => :number, :default => 1
133
+
134
+ # The Elasticsearch action to perform. Valid actions are: `index`, `delete`.
135
+ #
136
+ # Use of this setting *REQUIRES* you also configure the `document_id` setting
137
+ # because `delete` actions all require a document id.
138
+ #
139
+ # What does each action do?
140
+ #
141
+ # - index: indexes a document (an event from Logstash).
142
+ # - delete: deletes a document by id
143
+ # - create: indexes a document, fails if a document by that id already exists in the index.
144
+ # - update: updates a document by id
145
+ # following action is not supported by HTTP protocol
146
+ #
147
+ # For more details on actions, check out the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html[Elasticsearch bulk API documentation]
148
+ config :action, :validate => %w(index delete create update), :default => "index"
149
+
150
+ # Username and password (only valid when protocol is HTTP; this setting works with HTTP or HTTPS auth)
151
+ config :user, :validate => :string
152
+ config :password, :validate => :password
153
+
154
+ # HTTP Path at which the Elasticsearch server lives. Use this if you must run ES behind a proxy that remaps
155
+ # the root path for the Elasticsearch HTTP API lives. This option is ignored for non-HTTP transports.
156
+ config :path, :validate => :string, :default => "/"
157
+
158
+ # SSL Configurations (only valid when protocol is HTTP)
159
+ #
160
+ # Enable SSL
161
+ config :ssl, :validate => :boolean, :default => false
162
+
163
+ # Validate the server's certificate
164
+ # Disabling this severely compromises security
165
+ # For more information read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf
166
+ config :ssl_certificate_verification, :validate => :boolean, :default => true
167
+
168
+ # The .cer or .pem file to validate the server's certificate
169
+ config :cacert, :validate => :path
170
+
171
+ # The JKS truststore to validate the server's certificate
172
+ # Use either `:truststore` or `:cacert`
173
+ config :truststore, :validate => :path
174
+
175
+ # Set the truststore password
176
+ config :truststore_password, :validate => :password
177
+
178
+ # The keystore used to present a certificate to the server
179
+ # It can be either .jks or .p12
180
+ config :keystore, :validate => :path
181
+
182
+ # Set the truststore password
183
+ config :keystore_password, :validate => :password
184
+
185
+ # Enable cluster sniffing
186
+ # Asks host for the list of all cluster nodes and adds them to the hosts list
187
+ # Will return ALL nodes with HTTP enabled (including master nodes!). If you use
188
+ # this with master nodes, you probably want to disable HTTP on them by setting
189
+ # `http.enabled` to false in their elasticsearch.yml.
190
+ config :sniffing, :validate => :boolean, :default => false
191
+
192
+ # How long to wait, in seconds, between sniffing attempts
193
+ config :sniffing_delay, :validate => :number, :default => 30
194
+
195
+ # Set max retry for each event
196
+ config :max_retries, :validate => :number, :default => 3
197
+
198
+ # Set retry policy for events that failed to send
199
+ config :retry_max_items, :validate => :number, :default => 5000
200
+
201
+ # Set max interval between bulk retries
202
+ config :retry_max_interval, :validate => :number, :default => 5
203
+
204
+ # Set the address of a forward HTTP proxy. Must be used with the 'http' protocol
205
+ # Can be either a string, such as 'http://localhost:123' or a hash in the form
206
+ # {host: 'proxy.org' port: 80 scheme: 'http'}
207
+ # Note, this is NOT a SOCKS proxy, but a plain HTTP proxy
208
+ config :proxy
209
+
210
+ # Enable doc_as_upsert for update mode
211
+ # create a new document with source if document_id doesn't exists
212
+ config :doc_as_upsert, :validate => :boolean, :default => false
213
+
214
+ # Set upsert content for update mode
215
+ # create a new document with this parameter as json string if document_id doesn't exists
216
+ config :upsert, :validate => :string, :default => ""
217
+
218
+ public
219
+ def register
220
+ @hosts = Array(@hosts)
221
+ # retry-specific variables
222
+ @retry_flush_mutex = Mutex.new
223
+ @retry_teardown_requested = Concurrent::AtomicBoolean.new(false)
224
+ # needs flushing when interval
225
+ @retry_queue_needs_flushing = ConditionVariable.new
226
+ @retry_queue_not_full = ConditionVariable.new
227
+ @retry_queue = Queue.new
228
+ @submit_mutex = Mutex.new
229
+
230
+ client_settings = {}
231
+ common_options = {
232
+ :client_settings => client_settings,
233
+ :sniffing => @sniffing,
234
+ :sniffing_delay => @sniffing_delay
235
+ }
236
+
237
+ client_settings[:path] = "/#{@path}/".gsub(/\/+/, "/") # Normalize slashes
238
+ @logger.debug? && @logger.debug("Normalizing http path", :path => @path, :normalized => client_settings[:path])
239
+
240
+ if @hosts.nil? || @hosts.empty?
241
+ @logger.info("No 'host' set in elasticsearch output. Defaulting to localhost")
242
+ @hosts = ["localhost"]
243
+ end
244
+
245
+ client_settings.merge! setup_ssl()
246
+ client_settings.merge! setup_proxy()
247
+ common_options.merge! setup_basic_auth()
248
+
249
+ # Update API setup
250
+ update_options = {
251
+ :upsert => @upsert,
252
+ :doc_as_upsert => @doc_as_upsert
253
+ }
254
+ common_options.merge! update_options if @action == 'update'
255
+
256
+ @client = LogStash::Outputs::Elasticsearch::HttpClient.new(
257
+ common_options.merge(:hosts => @hosts, :port => @port)
258
+ )
259
+
260
+ if @manage_template
261
+ begin
262
+ @logger.info("Automatic template management enabled", :manage_template => @manage_template.to_s)
263
+ @client.template_install(@template_name, get_template, @template_overwrite)
264
+ rescue => e
265
+ @logger.error("Failed to install template: #{e.message}")
266
+ end
267
+ end
268
+
269
+ @logger.info("New Elasticsearch output", :hosts => @hosts, :port => @port)
270
+
271
+ @client_idx = 0
272
+
273
+ buffer_initialize(
274
+ :max_items => @flush_size,
275
+ :max_interval => @idle_flush_time,
276
+ :logger => @logger
277
+ )
278
+
279
+ @retry_timer_thread = Thread.new do
280
+ loop do
281
+ sleep(@retry_max_interval)
282
+ @retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
283
+ end
284
+ end
285
+
286
+ @retry_thread = Thread.new do
287
+ while @retry_teardown_requested.false?
288
+ @retry_flush_mutex.synchronize { @retry_queue_needs_flushing.wait(@retry_flush_mutex) }
289
+ retry_flush
290
+ end
291
+ end
292
+ end # def register
293
+
294
+ public
295
+ def get_template
296
+ if @template.nil?
297
+ @template = ::File.expand_path('elasticsearch/elasticsearch-template.json', ::File.dirname(__FILE__))
298
+ if !File.exists?(@template)
299
+ raise "You must specify 'template => ...' in your elasticsearch output (I looked for '#{@template}')"
300
+ end
301
+ end
302
+ template_json = IO.read(@template).gsub(/\n/,'')
303
+ template = LogStash::Json.load(template_json)
304
+ @logger.info("Using mapping template", :template => template)
305
+ return template
306
+ end # def get_template
307
+
308
+ public
309
+ def receive(event)
310
+ return unless output?(event)
311
+
312
+ # block until we have not maxed out our
313
+ # retry queue. This is applying back-pressure
314
+ # to slow down the receive-rate
315
+ @retry_flush_mutex.synchronize {
316
+ @retry_queue_not_full.wait(@retry_flush_mutex) while @retry_queue.size > @retry_max_items
317
+ }
318
+
319
+ event['@metadata']['retry_count'] = 0
320
+
321
+ # Set the 'type' value for the index.
322
+ type = if @document_type
323
+ event.sprintf(@document_type)
324
+ elsif @index_type # deprecated
325
+ event.sprintf(@index_type)
326
+ else
327
+ event["type"] || "logs"
328
+ end
329
+
330
+ params = {
331
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
332
+ :_index => event.sprintf(@index),
333
+ :_type => type,
334
+ :_routing => @routing ? event.sprintf(@routing) : nil
335
+ }
336
+
337
+ params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @action == 'update' && @upsert != ""
338
+
339
+ buffer_receive([event.sprintf(@action), params, event])
340
+ end # def receive
341
+
342
+ public
343
+ # The submit method can be called from both the
344
+ # Stud::Buffer flush thread and from our own retry thread.
345
+ def submit(actions)
346
+ @submit_mutex.synchronize do
347
+ es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash] }
348
+
349
+ bulk_response = @client.bulk(es_actions)
350
+
351
+ <<<<<<< c2b39d4ff5a5ced59a9e7229fc135cf2a966bdc4
352
+ if bulk_response["errors"]
353
+ actions_to_retry = []
354
+
355
+ bulk_response['items'].each_with_index do |item,idx|
356
+ action = es_actions[idx]
357
+ action_type, props = item.first # These are all hashes with one value, so we destructure them here
358
+
359
+ status = props['status']
360
+ error = props['error']
361
+
362
+ if RETRYABLE_CODES.include?(status)
363
+ @logger.warn "retrying failed action with response code: #{status}"
364
+ actions_to_retry << action
365
+ elsif not SUCCESS_CODES.include?(status)
366
+ @logger.warn "failed action", status: status, error: error, action: action
367
+ end
368
+ end
369
+
370
+ retry_push(actions_to_retry) unless actions_to_retry.empty?
371
+ =======
372
+ next unless bulk_response["errors"]
373
+
374
+ actions_to_retry = []
375
+
376
+ bulk_response["items"].each_with_index do |resp,idx|
377
+ action_type, action_props = resp.first
378
+
379
+ status = action_props["status"]
380
+ action = es_actions[idx]
381
+
382
+ if RETRYABLE_CODES.include?(status)
383
+ @logger.warn "retrying failed action with response code: #{status}"
384
+ actions_to_retry << action
385
+ elsif not SUCCESS_CODES.include?(status)
386
+ @logger.warn "failed action with response of #{status}, dropping action: #{action}"
387
+ end
388
+ >>>>>>> WIP for better retry errors
389
+ end
390
+
391
+ retry_push(actions_to_retry) unless actions_to_retry.empty?
392
+ end
393
+ end
394
+
395
+ # When there are exceptions raised upon submission, we raise an exception so that
396
+ # Stud::Buffer will retry to flush
397
+ public
398
+ def flush(actions, teardown = false)
399
+ begin
400
+ submit(actions)
401
+ rescue Manticore::SocketException => e
402
+ # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
403
+ # and let the user sort it out from there
404
+ @logger.error(
405
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}',"+
406
+ " but Elasticsearch appears to be unreachable or down!",
407
+ :client_config => @client.client_options,
408
+ :error_message => e.message
409
+ )
410
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
411
+ rescue => e
412
+ # For all other errors print out full connection issues
413
+ @logger.error(
414
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}'," +
415
+ " but an error occurred and it failed! Are you sure you can reach elasticsearch from this machine using " +
416
+ "the configuration provided?",
417
+ :client_config => @client.client_options,
418
+ :error_message => e.message,
419
+ :error_class => e.class.name,
420
+ :backtrace => e.backtrace
421
+ )
422
+
423
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
424
+
425
+ raise e
426
+ end
427
+ end # def flush
428
+
429
+ public
430
+ def teardown
431
+ @client.stop_sniffing!
432
+
433
+ @retry_teardown_requested.make_true
434
+ # First, make sure retry_timer_thread is stopped
435
+ # to ensure we do not signal a retry based on
436
+ # the retry interval.
437
+ Thread.kill(@retry_timer_thread)
438
+ @retry_timer_thread.join
439
+ # Signal flushing in the case that #retry_flush is in
440
+ # the process of waiting for a signal.
441
+ @retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
442
+ # Now, #retry_flush is ensured to not be in a state of
443
+ # waiting and can be safely joined into the main thread
444
+ # for further final execution of an in-process remaining call.
445
+ @retry_thread.join
446
+
447
+ # execute any final actions along with a proceeding retry for any
448
+ # final actions that did not succeed.
449
+ buffer_flush(:final => true)
450
+ retry_flush
451
+ end
452
+
453
+ private
454
+ def setup_proxy
455
+ return {} unless @proxy
456
+
457
+ # Symbolize keys
458
+ proxy = if @proxy.is_a?(Hash)
459
+ Hash[@proxy.map {|k,v| [k.to_sym, v]}]
460
+ elsif @proxy.is_a?(String)
461
+ @proxy
462
+ else
463
+ raise LogStash::ConfigurationError, "Expected 'proxy' to be a string or hash, not '#{@proxy}''!"
464
+ end
465
+
466
+ return {:proxy => proxy}
467
+ end
468
+
469
+ private
470
+ def setup_ssl
471
+ return {} unless @ssl
472
+
473
+ if @cacert && @truststore
474
+ raise(LogStash::ConfigurationError, "Use either \"cacert\" or \"truststore\" when configuring the CA certificate") if @truststore
475
+ end
476
+
477
+ ssl_options = {}
478
+
479
+ if @cacert
480
+ ssl_options[:ca_file] = @cacert
481
+ elsif @truststore
482
+ ssl_options[:truststore_password] = @truststore_password.value if @truststore_password
483
+ end
484
+
485
+ ssl_options[:truststore] = @truststore if @truststore
486
+ if @keystore
487
+ ssl_options[:keystore] = @keystore
488
+ ssl_options[:keystore_password] = @keystore_password.value if @keystore_password
489
+ end
490
+ if @ssl_certificate_verification == false
491
+ @logger.warn [
492
+ "** WARNING ** Detected UNSAFE options in elasticsearch output configuration!",
493
+ "** WARNING ** You have enabled encryption but DISABLED certificate verification.",
494
+ "** WARNING ** To make sure your data is secure change :ssl_certificate_verification to true"
495
+ ].join("\n")
496
+ ssl_options[:verify] = false
497
+ end
498
+ { ssl: ssl_options }
499
+ end
500
+
501
+ private
502
+ def setup_basic_auth
503
+ return {} unless @user && @password
504
+
505
+ {
506
+ :user => ::URI.escape(@user, "@:"),
507
+ :password => ::URI.escape(@password.value, "@:")
508
+ }
509
+ end
510
+
511
+ private
512
+ # in charge of submitting any actions in @retry_queue that need to be
513
+ # retried
514
+ #
515
+ # This method is not called concurrently. It is only called by @retry_thread
516
+ # and once that thread is ended during the teardown process, a final call
517
+ # to this method is done upon teardown in the main thread.
518
+ def retry_flush()
519
+ unless @retry_queue.empty?
520
+ buffer = @retry_queue.size.times.map do
521
+ next_action, next_doc, next_event = @retry_queue.pop
522
+ next_event['@metadata']['retry_count'] += 1
523
+
524
+ if next_event['@metadata']['retry_count'] > @max_retries
525
+ @logger.error "too many attempts at sending event. dropping: #{next_event}"
526
+ nil
527
+ else
528
+ [next_action, next_doc, next_event]
529
+ end
530
+ end.compact
531
+
532
+ submit(buffer) unless buffer.empty?
533
+ end
534
+
535
+ @retry_flush_mutex.synchronize {
536
+ @retry_queue_not_full.signal if @retry_queue.size < @retry_max_items
537
+ }
538
+ end
539
+
540
+ private
541
+ def retry_push(actions)
542
+ Array(actions).each{|action| @retry_queue << action}
543
+ @retry_flush_mutex.synchronize {
544
+ @retry_queue_needs_flushing.signal if @retry_queue.size >= @retry_max_items
545
+ }
546
+ end
547
+
548
+ @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-elasticsearch-/ }
549
+
550
+ @@plugins.each do |plugin|
551
+ name = plugin.name.split('-')[-1]
552
+ require "logstash/outputs/elasticsearch/#{name}"
553
+ end
554
+
555
+ end # class LogStash::Outputs::Elasticsearch
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-elasticsearch'
4
- s.version = '2.0.0.beta5'
4
+ s.version = '2.0.0.beta6'
5
5
  s.licenses = ['apache-2.0']
6
6
  s.summary = "Logstash Output to Elasticsearch"
7
7
  s.description = "Output events to elasticsearch"
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
11
11
  s.require_paths = ["lib"]
12
12
 
13
13
  # Files
14
- s.files = `git ls-files`.split($\)
14
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
15
15
 
16
16
  # Tests
17
17
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -21,7 +21,7 @@ Gem::Specification.new do |s|
21
21
 
22
22
  # Gem dependencies
23
23
  s.add_runtime_dependency 'concurrent-ruby'
24
- s.add_runtime_dependency 'elasticsearch', ['>= 1.0.10', '~> 1.0']
24
+ s.add_runtime_dependency 'elasticsearch', ['>= 1.0.13', '~> 1.0']
25
25
  s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
26
26
  s.add_runtime_dependency 'cabin', ['~> 0.6']
27
27
  s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0'
@@ -11,8 +11,18 @@ describe "failures in bulk class expected behavior", :integration => true do
11
11
  let(:max_retries) { 3 }
12
12
 
13
13
  def mock_actions_with_response(*resp)
14
- LogStash::Outputs::Elasticsearch::HttpClient
15
- .any_instance.stub(:bulk).and_return(*resp)
14
+ expanded_responses = resp.map do |resp|
15
+ items = resp["statuses"] && resp["statuses"].map do |status|
16
+ {"create" => {"status" => status, "error" => "Error for #{status}"}}
17
+ end
18
+
19
+ {
20
+ "errors" => resp["errors"],
21
+ "items" => items
22
+ }
23
+ end
24
+
25
+ allow_any_instance_of(LogStash::Outputs::Elasticsearch::HttpClient).to receive(:bulk).and_return(*expanded_responses)
16
26
  end
17
27
 
18
28
  subject! do
@@ -22,7 +32,7 @@ describe "failures in bulk class expected behavior", :integration => true do
22
32
  "template_overwrite" => true,
23
33
  "hosts" => get_host(),
24
34
  "port" => get_port(),
25
- "retry_max_items" => 10,
35
+ "retry_max_items" => 2,
26
36
  "retry_max_interval" => 1,
27
37
  "max_retries" => max_retries
28
38
  }
@@ -78,7 +88,7 @@ describe "failures in bulk class expected behavior", :integration => true do
78
88
  subject.receive(event1)
79
89
  subject.receive(event2)
80
90
  subject.buffer_flush(:final => true)
81
- sleep(3)
91
+ sleep(10)
82
92
  end
83
93
 
84
94
  it "should retry actions with response status of 429" do
@@ -3,28 +3,6 @@ require "logstash/outputs/elasticsearch/http_client"
3
3
  require "java"
4
4
 
5
5
  describe LogStash::Outputs::Elasticsearch::HttpClient do
6
- context "successful" do
7
- it "should map correctly" do
8
- bulk_response = {"took"=>74, "errors"=>false, "items"=>[{"create"=>{"_index"=>"logstash-2014.11.17",
9
- "_type"=>"logs", "_id"=>"AUxTS2C55Jrgi-hC6rQF",
10
- "_version"=>1, "status"=>201}}]}
11
- actual = LogStash::Outputs::Elasticsearch::HttpClient.normalize_bulk_response(bulk_response)
12
- insist { actual } == {"errors"=> false}
13
- end
14
- end
15
-
16
- context "contains failures" do
17
- it "should map correctly" do
18
- item_response = {"_index"=>"logstash-2014.11.17",
19
- "_type"=>"logs", "_id"=>"AUxTQ_OI5Jrgi-hC6rQB", "status"=>400,
20
- "error"=>"MapperParsingException[failed to parse]..."}
21
- bulk_response = {"took"=>71, "errors"=>true,
22
- "items"=>[{"create"=>item_response}]}
23
- actual = LogStash::Outputs::Elasticsearch::HttpClient.normalize_bulk_response(bulk_response)
24
- insist { actual } == {"errors"=> true, "statuses"=> [400], "details" => [item_response]}
25
- end
26
- end
27
-
28
6
  describe "sniffing" do
29
7
  let(:base_options) { {:hosts => ["127.0.0.1"] }}
30
8
  let(:client) { LogStash::Outputs::Elasticsearch::HttpClient.new(base_options.merge(client_opts)) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta5
4
+ version: 2.0.0.beta6
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-08 00:00:00.000000000 Z
11
+ date: 2015-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 1.0.10
32
+ version: 1.0.13
33
33
  - - "~>"
34
34
  - !ruby/object:Gem::Version
35
35
  version: '1.0'
@@ -40,7 +40,7 @@ dependencies:
40
40
  requirements:
41
41
  - - ">="
42
42
  - !ruby/object:Gem::Version
43
- version: 1.0.10
43
+ version: 1.0.13
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '1.0'
@@ -174,15 +174,14 @@ executables: []
174
174
  extensions: []
175
175
  extra_rdoc_files: []
176
176
  files:
177
- - ".gitignore"
178
177
  - CHANGELOG.md
179
178
  - CONTRIBUTORS
180
179
  - Gemfile
181
180
  - LICENSE
182
181
  - NOTICE.TXT
183
182
  - README.md
184
- - Rakefile
185
183
  - lib/logstash/outputs/elasticsearch.rb
184
+ - lib/logstash/outputs/elasticsearch.rb.orig
186
185
  - lib/logstash/outputs/elasticsearch/elasticsearch-template.json
187
186
  - lib/logstash/outputs/elasticsearch/http_client.rb
188
187
  - logstash-output-elasticsearch.gemspec
@@ -237,3 +236,4 @@ test_files:
237
236
  - spec/unit/outputs/elasticsearch_proxy_spec.rb
238
237
  - spec/unit/outputs/elasticsearch_spec.rb
239
238
  - spec/unit/outputs/elasticsearch_ssl_spec.rb
239
+ has_rdoc:
data/.gitignore DELETED
@@ -1,6 +0,0 @@
1
- *.gem
2
- Gemfile.lock
3
- .bundle
4
- .idea
5
- *~
6
- .ruby-version
data/Rakefile DELETED
@@ -1 +0,0 @@
1
- require "logstash/devutils/rake"