logstash-output-elasticsearch_java 1.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/CHANGELOG.md +2 -0
  4. data/CONTRIBUTORS +31 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE +13 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +98 -0
  9. data/Rakefile +1 -0
  10. data/lib/logstash/outputs/elasticsearch_java/elasticsearch-template.json +41 -0
  11. data/lib/logstash/outputs/elasticsearch_java/protocol.rb +258 -0
  12. data/lib/logstash/outputs/elasticsearch_java.rb +545 -0
  13. data/lib/logstash-output-elasticsearch_java_jars.rb +5 -0
  14. data/logstash-output-elasticsearch_java.gemspec +32 -0
  15. data/spec/es_spec_helper.rb +81 -0
  16. data/spec/integration/outputs/elasticsearch/node_spec.rb +36 -0
  17. data/spec/integration/outputs/index_spec.rb +90 -0
  18. data/spec/integration/outputs/retry_spec.rb +148 -0
  19. data/spec/integration/outputs/routing_spec.rb +60 -0
  20. data/spec/integration/outputs/secure_spec.rb +113 -0
  21. data/spec/integration/outputs/templates_spec.rb +97 -0
  22. data/spec/integration/outputs/transport_create_spec.rb +94 -0
  23. data/spec/integration/outputs/update_spec.rb +88 -0
  24. data/spec/unit/outputs/elasticsearch/protocol_spec.rb +32 -0
  25. data/spec/unit/outputs/elasticsearch_spec.rb +79 -0
  26. data/vendor/jar-dependencies/runtime-jars/antlr-runtime-3.5.jar +0 -0
  27. data/vendor/jar-dependencies/runtime-jars/asm-4.1.jar +0 -0
  28. data/vendor/jar-dependencies/runtime-jars/asm-commons-4.1.jar +0 -0
  29. data/vendor/jar-dependencies/runtime-jars/elasticsearch-1.7.0.jar +0 -0
  30. data/vendor/jar-dependencies/runtime-jars/lucene-analyzers-common-4.10.4.jar +0 -0
  31. data/vendor/jar-dependencies/runtime-jars/lucene-core-4.10.4.jar +0 -0
  32. data/vendor/jar-dependencies/runtime-jars/lucene-grouping-4.10.4.jar +0 -0
  33. data/vendor/jar-dependencies/runtime-jars/lucene-highlighter-4.10.4.jar +0 -0
  34. data/vendor/jar-dependencies/runtime-jars/lucene-join-4.10.4.jar +0 -0
  35. data/vendor/jar-dependencies/runtime-jars/lucene-memory-4.10.4.jar +0 -0
  36. data/vendor/jar-dependencies/runtime-jars/lucene-misc-4.10.4.jar +0 -0
  37. data/vendor/jar-dependencies/runtime-jars/lucene-queries-4.10.4.jar +0 -0
  38. data/vendor/jar-dependencies/runtime-jars/lucene-queryparser-4.10.4.jar +0 -0
  39. data/vendor/jar-dependencies/runtime-jars/lucene-sandbox-4.10.4.jar +0 -0
  40. data/vendor/jar-dependencies/runtime-jars/lucene-spatial-4.10.4.jar +0 -0
  41. data/vendor/jar-dependencies/runtime-jars/lucene-suggest-4.10.4.jar +0 -0
  42. data/vendor/jar-dependencies/runtime-jars/spatial4j-0.4.1.jar +0 -0
  43. metadata +241 -0
@@ -0,0 +1,545 @@
1
+ # encoding: utf-8
2
+ require "logstash/namespace"
3
+ require "logstash/environment"
4
+ require "logstash/outputs/base"
5
+ require "logstash/json"
6
+ require "concurrent"
7
+ require "stud/buffer"
8
+ require "socket" # for Socket.gethostname
9
+ require "thread" # for safe queueing
10
+ require "uri" # for escaping user input
11
+ require "logstash/outputs/elasticsearch_java/protocol"
12
+
13
+ # This output lets you store logs in Elasticsearch using the native 'node' and 'transport'
14
+ # protocols. It is highly recommended to use the regular 'logstash-output-elasticsearch' output
15
+ # which uses HTTP instead. This output is, in-fact, sometimes slower, and never faster than that one.
16
+ # Additionally, upgrading your Elasticsearch cluster may require you to simultaneously update this
17
+ # plugin for any protocol level changes. The HTTP client may be easier to work with due to wider
18
+ # familiarity with HTTP.
19
+ #
20
+ # *VERSION NOTE*: Your Elasticsearch cluster must be running Elasticsearch 1.0.0 or later.
21
+ #
22
+ # If you want to set other Elasticsearch options that are not exposed directly
23
+ # as configuration options, there are two methods:
24
+ #
25
+ # * Create an `elasticsearch.yml` file in the $PWD of the Logstash process
26
+ # * Pass in es.* java properties (`java -Des.node.foo=` or `ruby -J-Des.node.foo=`)
27
+ #
28
+ # With the default `protocol` setting ("node"), this plugin will join your
29
+ # Elasticsearch cluster as a client node, so it will show up in Elasticsearch's
30
+ # cluster status.
31
+ #
32
+ # You can learn more about Elasticsearch at <https://www.elastic.co/products/elasticsearch>
33
+ #
34
+ # ==== Operational Notes
35
+ #
36
+ # If using the default `protocol` setting ("node"), your firewalls might need
37
+ # to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and
38
+ # Elasticsearch to Logstash)
39
+ #
40
+ # ==== Retry Policy
41
+ #
42
+ # By default all bulk requests to ES are synchronous. Not all events in the bulk requests
43
+ # always make it successfully. For example, there could be events which are not formatted
44
+ # correctly for the index they are targeting (type mismatch in mapping). So that we minimize loss of
45
+ # events, we have a specific retry policy in place. We retry all events which fail to be reached by
46
+ # Elasticsearch for network related issues. We retry specific events which exhibit errors under a separate
47
+ # policy described below. Events of this nature are ones which experience ES error codes described as
48
+ # retryable errors.
49
+ #
50
+ # *Retryable Errors:*
51
+ #
52
+ # - 429, Too Many Requests (RFC6585)
53
+ # - 503, The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.
54
+ #
55
+ # Here are the rules of what is retried when:
56
+ #
57
+ # - Block and retry all events in bulk response that experiences transient network exceptions until
58
+ # a successful submission is received by Elasticsearch.
59
+ # - Retry subset of sent events which resulted in ES errors of a retryable nature which can be found
60
+ # in RETRYABLE_CODES
61
+ # - For events which returned retryable error codes, they will be pushed onto a separate queue for
62
+ # retrying events. events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries). The size of
63
+ # this queue is capped by the value set in :retry_max_items.
64
+ # - Events from the retry queue are submitted again either when the queue reaches its max size or when
65
+ # the max interval time is reached, which is set in :retry_max_interval.
66
+ # - Events which are not retryable or have reached their max retry count are logged to stderr.
67
+ class LogStash::Outputs::ElasticSearchJava < LogStash::Outputs::Base
68
+ attr_reader :client
69
+
70
+ include Stud::Buffer
71
+ RETRYABLE_CODES = [409, 429, 503]
72
+ SUCCESS_CODES = [200, 201]
73
+
74
+ config_name "elasticsearch_java"
75
+
76
+ # The index to write events to. This can be dynamic using the `%{foo}` syntax.
77
+ # The default value will partition your indices by day so you can more easily
78
+ # delete old data or only search specific date ranges.
79
+ # Indexes may not contain uppercase characters.
80
+ # For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}
81
+ config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
82
+
83
+ # The index type to write events to. Generally you should try to write only
84
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
85
+ #
86
+ # Deprecated in favor of `document_type` field.
87
+ config :index_type, :validate => :string, :deprecated => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
88
+
89
+ # The document type to write events to. Generally you should try to write only
90
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
91
+ # Unless you set 'document_type', the event 'type' will be used if it exists
92
+ # otherwise the document type will be assigned the value of 'logs'
93
+ config :document_type, :validate => :string
94
+
95
+ # Starting in Logstash 1.3 (unless you set option `manage_template` to false)
96
+ # a default mapping template for Elasticsearch will be applied, if you do not
97
+ # already have one set to match the index pattern defined (default of
98
+ # `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
99
+ # the template will be applied to all indices starting with `logstash-*`
100
+ #
101
+ # If you have dynamic templating (e.g. creating indices based on field names)
102
+ # then you should set `manage_template` to false and use the REST API to upload
103
+ # your templates manually.
104
+ config :manage_template, :validate => :boolean, :default => true
105
+
106
+ # This configuration option defines how the template is named inside Elasticsearch.
107
+ # Note that if you have used the template management features and subsequently
108
+ # change this, you will need to prune the old template manually, e.g.
109
+ #
110
+ # `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
111
+ #
112
+ # where `OldTemplateName` is whatever the former setting was.
113
+ config :template_name, :validate => :string, :default => "logstash"
114
+
115
+ # You can set the path to your own template here, if you so desire.
116
+ # If not set, the included template will be used.
117
+ config :template, :validate => :path
118
+
119
+ # Overwrite the current template with whatever is configured
120
+ # in the `template` and `template_name` directives.
121
+ config :template_overwrite, :validate => :boolean, :default => false
122
+
123
+ # The document ID for the index. Useful for overwriting existing entries in
124
+ # Elasticsearch with the same ID.
125
+ config :document_id, :validate => :string
126
+
127
+ # A routing override to be applied to all processed events.
128
+ # This can be dynamic using the `%{foo}` syntax.
129
+ config :routing, :validate => :string
130
+
131
+ # The name of your cluster if you set it on the Elasticsearch side. Useful
132
+ # for discovery when using `node` or `transport` protocols.
133
+ # By default, it looks for a cluster named 'elasticsearch'.
134
+ # Equivalent to the Elasticsearch option 'cluster.name'
135
+ config :cluster, :validate => :string
136
+
137
+ # For the `node` protocol, if you do not specify `host`, it will attempt to use
138
+ # multicast discovery to connect to Elasticsearch. If http://www.elastic.co/guide/en/elasticsearch/guide/current/_important_configuration_changes.html#_prefer_unicast_over_multicast[multicast is disabled] in Elasticsearch,
139
+ # you must include the hostname or IP address of the host(s) to use for Elasticsearch unicast discovery.
140
+ # Remember the `node` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#modules-transport[transport] address (eg. 9300, not 9200).
141
+ # `"127.0.0.1"`
142
+ # `["127.0.0.1:9300","127.0.0.2:9300"]`
143
+ # When setting hosts for `node` protocol, it is important to confirm that at least one non-client
144
+ # node is listed in the `host` list. Also keep in mind that the `host` parameter when used with
145
+ # the `node` protocol is for *discovery purposes only* (not for load balancing). When multiple hosts
146
+ # are specified, it will contact the first host to see if it can use it to discover the cluster. If not,
147
+ # then it will contact the second host in the list and so forth. With the `node` protocol,
148
+ # Logstash will join the Elasticsearch cluster as a node client (which has a copy of the cluster
149
+ # state) and this node client is the one that will automatically handle the load balancing of requests
150
+ # across data nodes in the cluster.
151
+ # If you are looking for a high availability setup, our recommendation is to use the `transport` protocol (below),
152
+ # set up multiple http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[client nodes] and list the client nodes in the `host` parameter.
153
+ #
154
+ # For the `transport` protocol, it will load balance requests across the hosts specified in the `host` parameter.
155
+ # Remember the `transport` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#modules-transport[transport] address (eg. 9300, not 9200).
156
+ # `"127.0.0.1"`
157
+ # `["127.0.0.1:9300","127.0.0.2:9300"]`
158
+ # There is also a `sniffing` option (see below) that can be used with the transport protocol to instruct it to use the host to sniff for
159
+ # "alive" nodes in the cluster and automatically use it as the hosts list (but will skip the dedicated master nodes).
160
+ # If you do not use the sniffing option, it is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
161
+ # to prevent Logstash from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
162
+ #
163
+ # For the `http` protocol, it will load balance requests across the hosts specified in the `host` parameter.
164
+ # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
165
+ # `"127.0.0.1"`
166
+ # `["127.0.0.1:9200","127.0.0.2:9200"]`
167
+ # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
168
+ # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
169
+ config :hosts, :validate => :array, :default => ["127.0.0.1"]
170
+
171
+ # The port for Elasticsearch transport to use.
172
+ #
173
+ # If you do not set this, the following defaults are used:
174
+ # * `protocol => transport` - port 9300-9305
175
+ # * `protocol => node` - port 9300-9305
176
+ config :port, :validate => :string, :default => "9300-9305"
177
+
178
+ # The name/address of the host to bind to for Elasticsearch clustering. Equivalent to the Elasticsearch option 'network.host'
179
+ # option.
180
+ # This MUST be set for either protocol to work (node or transport)! The internal Elasticsearch node
181
+ # will bind to this ip. This ip MUST be reachable by all nodes in the Elasticsearch cluster
182
+ config :network_host, :validate => :string
183
+
184
+ # This sets the local port to bind to. Equivalent to the Elasticsrearch option 'transport.tcp.port'
185
+ config :transport_tcp_port, :validate => :number
186
+
187
+ # This setting no longer does anything. It exists to keep config validation
188
+ # from failing. It will be removed in future versions.
189
+ config :max_inflight_requests, :validate => :number, :default => 50, :deprecated => true
190
+
191
+ # The node name Elasticsearch will use when joining a cluster.
192
+ #
193
+ # By default, this is generated internally by the ES client.
194
+ config :node_name, :validate => :string
195
+
196
+ # This plugin uses the bulk index api for improved indexing performance.
197
+ # To make efficient bulk api calls, we will buffer a certain number of
198
+ # events before flushing that out to Elasticsearch. This setting
199
+ # controls how many events will be buffered before sending a batch
200
+ # of events.
201
+ config :flush_size, :validate => :number, :default => 500
202
+
203
+ # The amount of time since last flush before a flush is forced.
204
+ #
205
+ # This setting helps ensure slow event rates don't get stuck in Logstash.
206
+ # For example, if your `flush_size` is 100, and you have received 10 events,
207
+ # and it has been more than `idle_flush_time` seconds since the last flush,
208
+ # Logstash will flush those 10 events automatically.
209
+ #
210
+ # This helps keep both fast and slow log streams moving along in
211
+ # near-real-time.
212
+ config :idle_flush_time, :validate => :number, :default => 1
213
+
214
+ # Choose the protocol used to talk to Elasticsearch.
215
+ #
216
+ # The 'node' protocol (default) will connect to the cluster as a normal Elasticsearch
217
+ # node (but will not store data). If you use the `node` protocol, you must permit
218
+ # bidirectional communication on the port 9300 (or whichever port you have
219
+ # configured).
220
+ #
221
+ # If you do not specify the `host` parameter, it will use multicast for http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html[Elasticsearch discovery]. While this may work in a test/dev environment where multicast is enabled in
222
+ # Elasticsearch, we strongly recommend http://www.elastic.co/guide/en/elasticsearch/guide/current/_important_configuration_changes.html#_prefer_unicast_over_multicast[disabling multicast]
223
+ # in Elasticsearch. To connect to an Elasticsearch cluster with multicast disabled,
224
+ # you must include the `host` parameter (see relevant section above).
225
+ #
226
+ # The 'transport' protocol will connect to the host you specify and will
227
+ # not show up as a 'node' in the Elasticsearch cluster. This is useful
228
+ # in situations where you cannot permit connections outbound from the
229
+ # Elasticsearch cluster to this Logstash server.
230
+ #
231
+ # All protocols will use bulk requests when talking to Elasticsearch.
232
+ config :protocol, :validate => [ "node", "transport"], :default => "transport"
233
+
234
+ # The Elasticsearch action to perform. Valid actions are: `index`, `delete`.
235
+ #
236
+ # Use of this setting *REQUIRES* you also configure the `document_id` setting
237
+ # because `delete` actions all require a document id.
238
+ #
239
+ # What does each action do?
240
+ #
241
+ # - index: indexes a document (an event from Logstash).
242
+ # - delete: deletes a document by id
243
+ # - create: indexes a document, fails if a document by that id already exists in the index.
244
+ # - update: updates a document by id
245
+ # following action is not supported by HTTP protocol
246
+ # - create_unless_exists: creates a document, fails if no id is provided
247
+ #
248
+ # For more details on actions, check out the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html[Elasticsearch bulk API documentation]
249
+ config :action, :validate => :string, :default => "index"
250
+
251
+ # Validate the server's certificate
252
+ # Disabling this severely compromises security
253
+ # For more information read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf
254
+ config :ssl_certificate_verification, :validate => :boolean, :default => true
255
+
256
+ # The .cer or .pem file to validate the server's certificate
257
+ config :cacert, :validate => :path
258
+
259
+ # The JKS truststore to validate the server's certificate
260
+ # Use either `:truststore` or `:cacert`
261
+ config :truststore, :validate => :path
262
+
263
+ # Set the truststore password
264
+ config :truststore_password, :validate => :password
265
+
266
+ # The keystore used to present a certificate to the server
267
+ # It can be either .jks or .p12
268
+ config :keystore, :validate => :path
269
+
270
+ # Set the truststore password
271
+ config :keystore_password, :validate => :password
272
+
273
+ # Enable cluster sniffing (transport only).
274
+ # Asks host for the list of all cluster nodes and adds them to the hosts list
275
+ # Equivalent to the Elasticsearch option 'client.transport.sniff'
276
+ config :sniffing, :validate => :boolean, :default => false
277
+
278
+ # Set max retry for each event
279
+ config :max_retries, :validate => :number, :default => 3
280
+
281
+ # Set retry policy for events that failed to send
282
+ config :retry_max_items, :validate => :number, :default => 5000
283
+
284
+ # Set max interval between bulk retries
285
+ config :retry_max_interval, :validate => :number, :default => 5
286
+
287
+ # Enable doc_as_upsert for update mode
288
+ # create a new document with source if document_id doesn't exists
289
+ config :doc_as_upsert, :validate => :boolean, :default => false
290
+
291
+ # Set upsert content for update mode
292
+ # create a new document with this parameter as json string if document_id doesn't exists
293
+ config :upsert, :validate => :string, :default => ""
294
+
295
+ public
296
+ def register
297
+ @submit_mutex = Mutex.new
298
+ # retry-specific variables
299
+ @retry_flush_mutex = Mutex.new
300
+ @retry_teardown_requested = Concurrent::AtomicBoolean.new(false)
301
+ # needs flushing when interval
302
+ @retry_queue_needs_flushing = ConditionVariable.new
303
+ @retry_queue_not_full = ConditionVariable.new
304
+ @retry_queue = Queue.new
305
+
306
+ client_settings = {}
307
+ client_settings["cluster.name"] = @cluster if @cluster
308
+ client_settings["network.host"] = @network_host if @network_host
309
+ client_settings["transport.tcp.port"] = @transport_tcp_port if @transport_tcp_port
310
+ client_settings["client.transport.sniff"] = @sniffing
311
+
312
+ if @node_name
313
+ client_settings["node.name"] = @node_name
314
+ else
315
+ client_settings["node.name"] = "logstash-#{Socket.gethostname}-#{$$}-#{object_id}"
316
+ end
317
+
318
+ @@plugins.each do |plugin|
319
+ name = plugin.name.split('-')[-1]
320
+ client_settings.merge!(LogStash::Outputs::ElasticSearchJava.const_get(name.capitalize).create_client_config(self))
321
+ end
322
+
323
+ if (@hosts.nil? || @hosts.empty?) && @protocol != "node" # node can use zen discovery
324
+ @logger.info("No 'hosts' set in elasticsearch output. Defaulting to localhost")
325
+ @hosts = ["localhost"]
326
+ end
327
+
328
+ common_options = {
329
+ :protocol => @protocol,
330
+ :client_settings => client_settings,
331
+ :hosts => @hosts,
332
+ :port => @port
333
+ }
334
+
335
+ # Update API setup
336
+ update_options = {
337
+ :upsert => @upsert,
338
+ :doc_as_upsert => @doc_as_upsert
339
+ }
340
+ common_options.merge! update_options if @action == 'update'
341
+
342
+ client_class = case @protocol
343
+ when "transport"
344
+ LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::TransportClient
345
+ when "node"
346
+ LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::NodeClient
347
+ end
348
+
349
+ @client = client_class.new(common_options)
350
+
351
+ if @manage_template
352
+ begin
353
+ @logger.info("Automatic template management enabled", :manage_template => @manage_template.to_s)
354
+ client.template_install(@template_name, get_template, @template_overwrite)
355
+ rescue => e
356
+ @logger.error("Failed to install template",
357
+ :message => e.message,
358
+ :error_class => e.class.name,
359
+ :backtrace => e.backtrace
360
+ )
361
+ end
362
+ end
363
+
364
+ @logger.info("New Elasticsearch output", :cluster => @cluster,
365
+ :hosts => @host, :port => @port, :protocol => @protocol)
366
+
367
+ buffer_initialize(
368
+ :max_items => @flush_size,
369
+ :max_interval => @idle_flush_time,
370
+ :logger => @logger
371
+ )
372
+
373
+ @retry_timer_thread = Thread.new do
374
+ loop do
375
+ sleep(@retry_max_interval)
376
+ @retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
377
+ end
378
+ end
379
+
380
+ @retry_thread = Thread.new do
381
+ while @retry_teardown_requested.false?
382
+ @retry_flush_mutex.synchronize { @retry_queue_needs_flushing.wait(@retry_flush_mutex) }
383
+ retry_flush
384
+ end
385
+ end
386
+ end # def register
387
+
388
+
389
+ public
390
+ def get_template
391
+ if @template.nil?
392
+ @template = ::File.expand_path('elasticsearch_java/elasticsearch-template.json', ::File.dirname(__FILE__))
393
+ if !File.exists?(@template)
394
+ raise "You must specify 'template => ...' in your elasticsearch output (I looked for '#{@template}')"
395
+ end
396
+ end
397
+ template_json = IO.read(@template).gsub(/\n/,'')
398
+ template = LogStash::Json.load(template_json)
399
+ @logger.info("Using mapping template", :template => template)
400
+ return template
401
+ end # def get_template
402
+
403
+ public
404
+ def receive(event)
405
+ return unless output?(event)
406
+
407
+ # block until we have not maxed out our
408
+ # retry queue. This is applying back-pressure
409
+ # to slow down the receive-rate
410
+ @retry_flush_mutex.synchronize {
411
+ @retry_queue_not_full.wait(@retry_flush_mutex) while @retry_queue.size > @retry_max_items
412
+ }
413
+
414
+ event['@metadata']['retry_count'] = 0
415
+
416
+ # Set the 'type' value for the index.
417
+ type = if @document_type
418
+ event.sprintf(@document_type)
419
+ elsif @index_type # deprecated
420
+ event.sprintf(@index_type)
421
+ else
422
+ event["type"] || "logs"
423
+ end
424
+
425
+ params = {
426
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
427
+ :_index => event.sprintf(@index),
428
+ :_type => type,
429
+ :_routing => @routing ? event.sprintf(@routing) : nil
430
+ }
431
+
432
+ params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @action == 'update' && @upsert != ""
433
+
434
+ buffer_receive([event.sprintf(@action), params, event])
435
+ end # def receive
436
+
437
+ public
438
+ # The submit method can be called from both the
439
+ # Stud::Buffer flush thread and from our own retry thread.
440
+ def submit(actions)
441
+ es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash] }
442
+ @submit_mutex.lock
443
+ begin
444
+ bulk_response = client.bulk(es_actions)
445
+ ensure
446
+ @submit_mutex.unlock
447
+ end
448
+ if bulk_response["errors"]
449
+ actions_with_responses = actions.zip(bulk_response['statuses'])
450
+ actions_to_retry = []
451
+ actions_with_responses.each do |action, resp_code|
452
+ if RETRYABLE_CODES.include?(resp_code)
453
+ @logger.warn "retrying failed action with response code: #{resp_code}"
454
+ actions_to_retry << action
455
+ elsif not SUCCESS_CODES.include?(resp_code)
456
+ @logger.warn "failed action with response of #{resp_code}, dropping action: #{action}"
457
+ end
458
+ end
459
+ retry_push(actions_to_retry) unless actions_to_retry.empty?
460
+ end
461
+ end
462
+
463
+ # When there are exceptions raised upon submission, we raise an exception so that
464
+ # Stud::Buffer will retry to flush
465
+ public
466
+ def flush(actions, teardown = false)
467
+ begin
468
+ submit(actions)
469
+ rescue => e
470
+ @logger.error "Got error to send bulk of actions: #{e.message}"
471
+ raise e
472
+ end
473
+ end # def flush
474
+
475
+ public
476
+ def teardown
477
+ if @cacert # remove temporary jks store created from the cacert
478
+ File.delete(@truststore)
479
+ end
480
+
481
+ @retry_teardown_requested.make_true
482
+ # First, make sure retry_timer_thread is stopped
483
+ # to ensure we do not signal a retry based on
484
+ # the retry interval.
485
+ Thread.kill(@retry_timer_thread)
486
+ @retry_timer_thread.join
487
+ # Signal flushing in the case that #retry_flush is in
488
+ # the process of waiting for a signal.
489
+ @retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
490
+ # Now, #retry_flush is ensured to not be in a state of
491
+ # waiting and can be safely joined into the main thread
492
+ # for further final execution of an in-process remaining call.
493
+ @retry_thread.join
494
+
495
+ # execute any final actions along with a proceeding retry for any
496
+ # final actions that did not succeed.
497
+ buffer_flush(:final => true)
498
+ retry_flush
499
+ end
500
+
501
+ private
502
+ # in charge of submitting any actions in @retry_queue that need to be
503
+ # retried
504
+ #
505
+ # This method is not called concurrently. It is only called by @retry_thread
506
+ # and once that thread is ended during the teardown process, a final call
507
+ # to this method is done upon teardown in the main thread.
508
+ def retry_flush()
509
+ unless @retry_queue.empty?
510
+ buffer = @retry_queue.size.times.map do
511
+ next_action, next_doc, next_event = @retry_queue.pop
512
+ next_event['@metadata']['retry_count'] += 1
513
+
514
+ if next_event['@metadata']['retry_count'] > @max_retries
515
+ @logger.error "too many attempts at sending event. dropping: #{next_event}"
516
+ nil
517
+ else
518
+ [next_action, next_doc, next_event]
519
+ end
520
+ end.compact
521
+
522
+ submit(buffer) unless buffer.empty?
523
+ end
524
+
525
+ @retry_flush_mutex.synchronize {
526
+ @retry_queue_not_full.signal if @retry_queue.size < @retry_max_items
527
+ }
528
+ end
529
+
530
+ private
531
+ def retry_push(actions)
532
+ Array(actions).each{|action| @retry_queue << action}
533
+ @retry_flush_mutex.synchronize {
534
+ @retry_queue_needs_flushing.signal if @retry_queue.size >= @retry_max_items
535
+ }
536
+ end
537
+
538
+ @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-elasticsearch_java-/ }
539
+
540
+ @@plugins.each do |plugin|
541
+ name = plugin.name.split('-')[-1]
542
+ require "logstash/outputs/elasticsearch_java/#{name}"
543
+ end
544
+
545
+ end # class LogStash::Outputs::ElasticSearchJava
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ require 'logstash/environment'
3
+
4
+ root_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
5
+ LogStash::Environment.load_runtime_jars! File.join(root_dir, "vendor")
@@ -0,0 +1,32 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-elasticsearch_java'
3
+ s.version = '1.0.0.beta1'
4
+ s.licenses = ['apache-2.0']
5
+ s.summary = "Logstash Output to Elasticsearch using Java node/transport client"
6
+ s.description = "Output events to elasticsearch using the java client"
7
+ s.authors = ["Elastic"]
8
+ s.email = 'info@elastic.co'
9
+ s.homepage = "http://logstash.net/"
10
+ s.require_paths = ["lib"]
11
+
12
+ # Files
13
+ s.files = `git ls-files`.split($\)
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency 'concurrent-ruby'
23
+ s.add_runtime_dependency 'elasticsearch', ['>= 1.0.10', '~> 1.0']
24
+ s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
25
+ s.add_runtime_dependency 'cabin', ['~> 0.6']
26
+ s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0'
27
+
28
+ s.add_development_dependency 'ftw', '~> 0.0.42'
29
+ s.add_development_dependency 'logstash-input-generator'
30
+ s.add_development_dependency 'logstash-devutils'
31
+ s.add_development_dependency 'longshoreman'
32
+ end
@@ -0,0 +1,81 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "ftw"
3
+ require "logstash/plugin"
4
+ require "logstash/json"
5
+ require "stud/try"
6
+ require "longshoreman"
7
+ require "logstash/outputs/elasticsearch_java"
8
+ require "logstash/outputs/elasticsearch_java/protocol"
9
+
10
+ CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
11
+ CONTAINER_IMAGE = "elasticsearch"
12
+ CONTAINER_TAG = "1.6"
13
+
14
+ DOCKER_INTEGRATION = ENV["DOCKER_INTEGRATION"]
15
+
16
+ module ESHelper
17
+ def get_local_host
18
+ "127.0.0.1"
19
+ end
20
+
21
+ def get_host
22
+ DOCKER_INTEGRATION ? Longshoreman.new.get_host_ip : "127.0.0.1"
23
+ end
24
+
25
+ def get_port(protocol)
26
+ unless DOCKER_INTEGRATION
27
+ return protocol.to_sym == :http ? 9200 : 9300
28
+ end
29
+
30
+ container = Longshoreman::Container.new
31
+ container.get(CONTAINER_NAME)
32
+ container.rport(9300)
33
+ end
34
+
35
+ def get_client
36
+ Elasticsearch::Client.new(:hosts => "#{get_host}:#{get_port('http')}")
37
+ end
38
+ end
39
+
40
+
41
+ RSpec.configure do |config|
42
+ config.include ESHelper
43
+
44
+
45
+ if DOCKER_INTEGRATION
46
+ # this :all hook gets run before every describe block that is tagged with :integration => true.
47
+ config.before(:all, :integration => true) do
48
+
49
+
50
+ # check if container exists already before creating new one.
51
+ begin
52
+ ls = Longshoreman::new
53
+ ls.container.get(CONTAINER_NAME)
54
+ rescue Docker::Error::NotFoundError
55
+ Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME)
56
+ # TODO(talevy): verify ES is running instead of static timeout
57
+ sleep 10
58
+ end
59
+ end
60
+
61
+ # we want to do a final cleanup after all :integration runs,
62
+ # but we don't want to clean up before the last block.
63
+ # This is a final blind check to see if the ES docker container is running and
64
+ # needs to be cleaned up. If no container can be found and/or docker is not
65
+ # running on the system, we do nothing.
66
+ config.after(:suite) do
67
+ # only cleanup docker container if system has docker and the container is running
68
+ begin
69
+ ls = Longshoreman::new
70
+ ls.container.get(CONTAINER_NAME)
71
+ ls.cleanup
72
+ rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
73
+ # do nothing
74
+ end
75
+ end
76
+ end
77
+
78
+ config.after(:each) do
79
+ LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::NodeClient.clear_client()
80
+ end
81
+ end