logstash-output-elasticsearch_java 1.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +31 -0
- data/Gemfile +3 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +5 -0
- data/README.md +98 -0
- data/Rakefile +1 -0
- data/lib/logstash/outputs/elasticsearch_java/elasticsearch-template.json +41 -0
- data/lib/logstash/outputs/elasticsearch_java/protocol.rb +258 -0
- data/lib/logstash/outputs/elasticsearch_java.rb +545 -0
- data/lib/logstash-output-elasticsearch_java_jars.rb +5 -0
- data/logstash-output-elasticsearch_java.gemspec +32 -0
- data/spec/es_spec_helper.rb +81 -0
- data/spec/integration/outputs/elasticsearch/node_spec.rb +36 -0
- data/spec/integration/outputs/index_spec.rb +90 -0
- data/spec/integration/outputs/retry_spec.rb +148 -0
- data/spec/integration/outputs/routing_spec.rb +60 -0
- data/spec/integration/outputs/secure_spec.rb +113 -0
- data/spec/integration/outputs/templates_spec.rb +97 -0
- data/spec/integration/outputs/transport_create_spec.rb +94 -0
- data/spec/integration/outputs/update_spec.rb +88 -0
- data/spec/unit/outputs/elasticsearch/protocol_spec.rb +32 -0
- data/spec/unit/outputs/elasticsearch_spec.rb +79 -0
- data/vendor/jar-dependencies/runtime-jars/antlr-runtime-3.5.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/asm-4.1.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/asm-commons-4.1.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/elasticsearch-1.7.0.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-analyzers-common-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-core-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-grouping-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-highlighter-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-join-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-memory-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-misc-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-queries-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-queryparser-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-sandbox-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-spatial-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-suggest-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/spatial4j-0.4.1.jar +0 -0
- metadata +241 -0
@@ -0,0 +1,545 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/namespace"
|
3
|
+
require "logstash/environment"
|
4
|
+
require "logstash/outputs/base"
|
5
|
+
require "logstash/json"
|
6
|
+
require "concurrent"
|
7
|
+
require "stud/buffer"
|
8
|
+
require "socket" # for Socket.gethostname
|
9
|
+
require "thread" # for safe queueing
|
10
|
+
require "uri" # for escaping user input
|
11
|
+
require "logstash/outputs/elasticsearch_java/protocol"
|
12
|
+
|
13
|
+
# This output lets you store logs in Elasticsearch using the native 'node' and 'transport'
|
14
|
+
# protocols. It is highly recommended to use the regular 'logstash-output-elasticsearch' output
|
15
|
+
# which uses HTTP instead. This output is, in-fact, sometimes slower, and never faster than that one.
|
16
|
+
# Additionally, upgrading your Elasticsearch cluster may require you to simultaneously update this
|
17
|
+
# plugin for any protocol level changes. The HTTP client may be easier to work with due to wider
|
18
|
+
# familiarity with HTTP.
|
19
|
+
#
|
20
|
+
# *VERSION NOTE*: Your Elasticsearch cluster must be running Elasticsearch 1.0.0 or later.
|
21
|
+
#
|
22
|
+
# If you want to set other Elasticsearch options that are not exposed directly
|
23
|
+
# as configuration options, there are two methods:
|
24
|
+
#
|
25
|
+
# * Create an `elasticsearch.yml` file in the $PWD of the Logstash process
|
26
|
+
# * Pass in es.* java properties (`java -Des.node.foo=` or `ruby -J-Des.node.foo=`)
|
27
|
+
#
|
28
|
+
# With the default `protocol` setting ("node"), this plugin will join your
|
29
|
+
# Elasticsearch cluster as a client node, so it will show up in Elasticsearch's
|
30
|
+
# cluster status.
|
31
|
+
#
|
32
|
+
# You can learn more about Elasticsearch at <https://www.elastic.co/products/elasticsearch>
|
33
|
+
#
|
34
|
+
# ==== Operational Notes
|
35
|
+
#
|
36
|
+
# If using the default `protocol` setting ("node"), your firewalls might need
|
37
|
+
# to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and
|
38
|
+
# Elasticsearch to Logstash)
|
39
|
+
#
|
40
|
+
# ==== Retry Policy
|
41
|
+
#
|
42
|
+
# By default all bulk requests to ES are synchronous. Not all events in the bulk requests
|
43
|
+
# always make it successfully. For example, there could be events which are not formatted
|
44
|
+
# correctly for the index they are targeting (type mismatch in mapping). So that we minimize loss of
|
45
|
+
# events, we have a specific retry policy in place. We retry all events which fail to be reached by
|
46
|
+
# Elasticsearch for network related issues. We retry specific events which exhibit errors under a separate
|
47
|
+
# policy described below. Events of this nature are ones which experience ES error codes described as
|
48
|
+
# retryable errors.
|
49
|
+
#
|
50
|
+
# *Retryable Errors:*
|
51
|
+
#
|
52
|
+
# - 429, Too Many Requests (RFC6585)
|
53
|
+
# - 503, The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.
|
54
|
+
#
|
55
|
+
# Here are the rules of what is retried when:
|
56
|
+
#
|
57
|
+
# - Block and retry all events in bulk response that experiences transient network exceptions until
|
58
|
+
# a successful submission is received by Elasticsearch.
|
59
|
+
# - Retry subset of sent events which resulted in ES errors of a retryable nature which can be found
|
60
|
+
# in RETRYABLE_CODES
|
61
|
+
# - For events which returned retryable error codes, they will be pushed onto a separate queue for
|
62
|
+
# retrying events. events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries). The size of
|
63
|
+
# this queue is capped by the value set in :retry_max_items.
|
64
|
+
# - Events from the retry queue are submitted again either when the queue reaches its max size or when
|
65
|
+
# the max interval time is reached, which is set in :retry_max_interval.
|
66
|
+
# - Events which are not retryable or have reached their max retry count are logged to stderr.
|
67
|
+
class LogStash::Outputs::ElasticSearchJava < LogStash::Outputs::Base
|
68
|
+
attr_reader :client
|
69
|
+
|
70
|
+
include Stud::Buffer
|
71
|
+
RETRYABLE_CODES = [409, 429, 503]
|
72
|
+
SUCCESS_CODES = [200, 201]
|
73
|
+
|
74
|
+
config_name "elasticsearch_java"
|
75
|
+
|
76
|
+
# The index to write events to. This can be dynamic using the `%{foo}` syntax.
|
77
|
+
# The default value will partition your indices by day so you can more easily
|
78
|
+
# delete old data or only search specific date ranges.
|
79
|
+
# Indexes may not contain uppercase characters.
|
80
|
+
# For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}
|
81
|
+
config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
|
82
|
+
|
83
|
+
# The index type to write events to. Generally you should try to write only
|
84
|
+
# similar events to the same 'type'. String expansion `%{foo}` works here.
|
85
|
+
#
|
86
|
+
# Deprecated in favor of `document_type` field.
|
87
|
+
config :index_type, :validate => :string, :deprecated => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
|
88
|
+
|
89
|
+
# The document type to write events to. Generally you should try to write only
|
90
|
+
# similar events to the same 'type'. String expansion `%{foo}` works here.
|
91
|
+
# Unless you set 'document_type', the event 'type' will be used if it exists
|
92
|
+
# otherwise the document type will be assigned the value of 'logs'
|
93
|
+
config :document_type, :validate => :string
|
94
|
+
|
95
|
+
# Starting in Logstash 1.3 (unless you set option `manage_template` to false)
|
96
|
+
# a default mapping template for Elasticsearch will be applied, if you do not
|
97
|
+
# already have one set to match the index pattern defined (default of
|
98
|
+
# `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
|
99
|
+
# the template will be applied to all indices starting with `logstash-*`
|
100
|
+
#
|
101
|
+
# If you have dynamic templating (e.g. creating indices based on field names)
|
102
|
+
# then you should set `manage_template` to false and use the REST API to upload
|
103
|
+
# your templates manually.
|
104
|
+
config :manage_template, :validate => :boolean, :default => true
|
105
|
+
|
106
|
+
# This configuration option defines how the template is named inside Elasticsearch.
|
107
|
+
# Note that if you have used the template management features and subsequently
|
108
|
+
# change this, you will need to prune the old template manually, e.g.
|
109
|
+
#
|
110
|
+
# `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
|
111
|
+
#
|
112
|
+
# where `OldTemplateName` is whatever the former setting was.
|
113
|
+
config :template_name, :validate => :string, :default => "logstash"
|
114
|
+
|
115
|
+
# You can set the path to your own template here, if you so desire.
|
116
|
+
# If not set, the included template will be used.
|
117
|
+
config :template, :validate => :path
|
118
|
+
|
119
|
+
# Overwrite the current template with whatever is configured
|
120
|
+
# in the `template` and `template_name` directives.
|
121
|
+
config :template_overwrite, :validate => :boolean, :default => false
|
122
|
+
|
123
|
+
# The document ID for the index. Useful for overwriting existing entries in
|
124
|
+
# Elasticsearch with the same ID.
|
125
|
+
config :document_id, :validate => :string
|
126
|
+
|
127
|
+
# A routing override to be applied to all processed events.
|
128
|
+
# This can be dynamic using the `%{foo}` syntax.
|
129
|
+
config :routing, :validate => :string
|
130
|
+
|
131
|
+
# The name of your cluster if you set it on the Elasticsearch side. Useful
|
132
|
+
# for discovery when using `node` or `transport` protocols.
|
133
|
+
# By default, it looks for a cluster named 'elasticsearch'.
|
134
|
+
# Equivalent to the Elasticsearch option 'cluster.name'
|
135
|
+
config :cluster, :validate => :string
|
136
|
+
|
137
|
+
# For the `node` protocol, if you do not specify `host`, it will attempt to use
|
138
|
+
# multicast discovery to connect to Elasticsearch. If http://www.elastic.co/guide/en/elasticsearch/guide/current/_important_configuration_changes.html#_prefer_unicast_over_multicast[multicast is disabled] in Elasticsearch,
|
139
|
+
# you must include the hostname or IP address of the host(s) to use for Elasticsearch unicast discovery.
|
140
|
+
# Remember the `node` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#modules-transport[transport] address (eg. 9300, not 9200).
|
141
|
+
# `"127.0.0.1"`
|
142
|
+
# `["127.0.0.1:9300","127.0.0.2:9300"]`
|
143
|
+
# When setting hosts for `node` protocol, it is important to confirm that at least one non-client
|
144
|
+
# node is listed in the `host` list. Also keep in mind that the `host` parameter when used with
|
145
|
+
# the `node` protocol is for *discovery purposes only* (not for load balancing). When multiple hosts
|
146
|
+
# are specified, it will contact the first host to see if it can use it to discover the cluster. If not,
|
147
|
+
# then it will contact the second host in the list and so forth. With the `node` protocol,
|
148
|
+
# Logstash will join the Elasticsearch cluster as a node client (which has a copy of the cluster
|
149
|
+
# state) and this node client is the one that will automatically handle the load balancing of requests
|
150
|
+
# across data nodes in the cluster.
|
151
|
+
# If you are looking for a high availability setup, our recommendation is to use the `transport` protocol (below),
|
152
|
+
# set up multiple http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[client nodes] and list the client nodes in the `host` parameter.
|
153
|
+
#
|
154
|
+
# For the `transport` protocol, it will load balance requests across the hosts specified in the `host` parameter.
|
155
|
+
# Remember the `transport` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#modules-transport[transport] address (eg. 9300, not 9200).
|
156
|
+
# `"127.0.0.1"`
|
157
|
+
# `["127.0.0.1:9300","127.0.0.2:9300"]`
|
158
|
+
# There is also a `sniffing` option (see below) that can be used with the transport protocol to instruct it to use the host to sniff for
|
159
|
+
# "alive" nodes in the cluster and automatically use it as the hosts list (but will skip the dedicated master nodes).
|
160
|
+
# If you do not use the sniffing option, it is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
|
161
|
+
# to prevent Logstash from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
|
162
|
+
#
|
163
|
+
# For the `http` protocol, it will load balance requests across the hosts specified in the `host` parameter.
|
164
|
+
# Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
|
165
|
+
# `"127.0.0.1"`
|
166
|
+
# `["127.0.0.1:9200","127.0.0.2:9200"]`
|
167
|
+
# It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
|
168
|
+
# to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
|
169
|
+
config :hosts, :validate => :array, :default => ["127.0.0.1"]
|
170
|
+
|
171
|
+
# The port for Elasticsearch transport to use.
|
172
|
+
#
|
173
|
+
# If you do not set this, the following defaults are used:
|
174
|
+
# * `protocol => transport` - port 9300-9305
|
175
|
+
# * `protocol => node` - port 9300-9305
|
176
|
+
config :port, :validate => :string, :default => "9300-9305"
|
177
|
+
|
178
|
+
# The name/address of the host to bind to for Elasticsearch clustering. Equivalent to the Elasticsearch option 'network.host'
|
179
|
+
# option.
|
180
|
+
# This MUST be set for either protocol to work (node or transport)! The internal Elasticsearch node
|
181
|
+
# will bind to this ip. This ip MUST be reachable by all nodes in the Elasticsearch cluster
|
182
|
+
config :network_host, :validate => :string
|
183
|
+
|
184
|
+
# This sets the local port to bind to. Equivalent to the Elasticsrearch option 'transport.tcp.port'
|
185
|
+
config :transport_tcp_port, :validate => :number
|
186
|
+
|
187
|
+
# This setting no longer does anything. It exists to keep config validation
|
188
|
+
# from failing. It will be removed in future versions.
|
189
|
+
config :max_inflight_requests, :validate => :number, :default => 50, :deprecated => true
|
190
|
+
|
191
|
+
# The node name Elasticsearch will use when joining a cluster.
|
192
|
+
#
|
193
|
+
# By default, this is generated internally by the ES client.
|
194
|
+
config :node_name, :validate => :string
|
195
|
+
|
196
|
+
# This plugin uses the bulk index api for improved indexing performance.
|
197
|
+
# To make efficient bulk api calls, we will buffer a certain number of
|
198
|
+
# events before flushing that out to Elasticsearch. This setting
|
199
|
+
# controls how many events will be buffered before sending a batch
|
200
|
+
# of events.
|
201
|
+
config :flush_size, :validate => :number, :default => 500
|
202
|
+
|
203
|
+
# The amount of time since last flush before a flush is forced.
|
204
|
+
#
|
205
|
+
# This setting helps ensure slow event rates don't get stuck in Logstash.
|
206
|
+
# For example, if your `flush_size` is 100, and you have received 10 events,
|
207
|
+
# and it has been more than `idle_flush_time` seconds since the last flush,
|
208
|
+
# Logstash will flush those 10 events automatically.
|
209
|
+
#
|
210
|
+
# This helps keep both fast and slow log streams moving along in
|
211
|
+
# near-real-time.
|
212
|
+
config :idle_flush_time, :validate => :number, :default => 1
|
213
|
+
|
214
|
+
# Choose the protocol used to talk to Elasticsearch.
|
215
|
+
#
|
216
|
+
# The 'node' protocol (default) will connect to the cluster as a normal Elasticsearch
|
217
|
+
# node (but will not store data). If you use the `node` protocol, you must permit
|
218
|
+
# bidirectional communication on the port 9300 (or whichever port you have
|
219
|
+
# configured).
|
220
|
+
#
|
221
|
+
# If you do not specify the `host` parameter, it will use multicast for http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html[Elasticsearch discovery]. While this may work in a test/dev environment where multicast is enabled in
|
222
|
+
# Elasticsearch, we strongly recommend http://www.elastic.co/guide/en/elasticsearch/guide/current/_important_configuration_changes.html#_prefer_unicast_over_multicast[disabling multicast]
|
223
|
+
# in Elasticsearch. To connect to an Elasticsearch cluster with multicast disabled,
|
224
|
+
# you must include the `host` parameter (see relevant section above).
|
225
|
+
#
|
226
|
+
# The 'transport' protocol will connect to the host you specify and will
|
227
|
+
# not show up as a 'node' in the Elasticsearch cluster. This is useful
|
228
|
+
# in situations where you cannot permit connections outbound from the
|
229
|
+
# Elasticsearch cluster to this Logstash server.
|
230
|
+
#
|
231
|
+
# All protocols will use bulk requests when talking to Elasticsearch.
|
232
|
+
config :protocol, :validate => [ "node", "transport"], :default => "transport"
|
233
|
+
|
234
|
+
# The Elasticsearch action to perform. Valid actions are: `index`, `delete`.
|
235
|
+
#
|
236
|
+
# Use of this setting *REQUIRES* you also configure the `document_id` setting
|
237
|
+
# because `delete` actions all require a document id.
|
238
|
+
#
|
239
|
+
# What does each action do?
|
240
|
+
#
|
241
|
+
# - index: indexes a document (an event from Logstash).
|
242
|
+
# - delete: deletes a document by id
|
243
|
+
# - create: indexes a document, fails if a document by that id already exists in the index.
|
244
|
+
# - update: updates a document by id
|
245
|
+
# following action is not supported by HTTP protocol
|
246
|
+
# - create_unless_exists: creates a document, fails if no id is provided
|
247
|
+
#
|
248
|
+
# For more details on actions, check out the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html[Elasticsearch bulk API documentation]
|
249
|
+
config :action, :validate => :string, :default => "index"
|
250
|
+
|
251
|
+
# Validate the server's certificate
|
252
|
+
# Disabling this severely compromises security
|
253
|
+
# For more information read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf
|
254
|
+
config :ssl_certificate_verification, :validate => :boolean, :default => true
|
255
|
+
|
256
|
+
# The .cer or .pem file to validate the server's certificate
|
257
|
+
config :cacert, :validate => :path
|
258
|
+
|
259
|
+
# The JKS truststore to validate the server's certificate
|
260
|
+
# Use either `:truststore` or `:cacert`
|
261
|
+
config :truststore, :validate => :path
|
262
|
+
|
263
|
+
# Set the truststore password
|
264
|
+
config :truststore_password, :validate => :password
|
265
|
+
|
266
|
+
# The keystore used to present a certificate to the server
|
267
|
+
# It can be either .jks or .p12
|
268
|
+
config :keystore, :validate => :path
|
269
|
+
|
270
|
+
# Set the truststore password
|
271
|
+
config :keystore_password, :validate => :password
|
272
|
+
|
273
|
+
# Enable cluster sniffing (transport only).
|
274
|
+
# Asks host for the list of all cluster nodes and adds them to the hosts list
|
275
|
+
# Equivalent to the Elasticsearch option 'client.transport.sniff'
|
276
|
+
config :sniffing, :validate => :boolean, :default => false
|
277
|
+
|
278
|
+
# Set max retry for each event
|
279
|
+
config :max_retries, :validate => :number, :default => 3
|
280
|
+
|
281
|
+
# Set retry policy for events that failed to send
|
282
|
+
config :retry_max_items, :validate => :number, :default => 5000
|
283
|
+
|
284
|
+
# Set max interval between bulk retries
|
285
|
+
config :retry_max_interval, :validate => :number, :default => 5
|
286
|
+
|
287
|
+
# Enable doc_as_upsert for update mode
|
288
|
+
# create a new document with source if document_id doesn't exists
|
289
|
+
config :doc_as_upsert, :validate => :boolean, :default => false
|
290
|
+
|
291
|
+
# Set upsert content for update mode
|
292
|
+
# create a new document with this parameter as json string if document_id doesn't exists
|
293
|
+
config :upsert, :validate => :string, :default => ""
|
294
|
+
|
295
|
+
public
|
296
|
+
def register
|
297
|
+
@submit_mutex = Mutex.new
|
298
|
+
# retry-specific variables
|
299
|
+
@retry_flush_mutex = Mutex.new
|
300
|
+
@retry_teardown_requested = Concurrent::AtomicBoolean.new(false)
|
301
|
+
# needs flushing when interval
|
302
|
+
@retry_queue_needs_flushing = ConditionVariable.new
|
303
|
+
@retry_queue_not_full = ConditionVariable.new
|
304
|
+
@retry_queue = Queue.new
|
305
|
+
|
306
|
+
client_settings = {}
|
307
|
+
client_settings["cluster.name"] = @cluster if @cluster
|
308
|
+
client_settings["network.host"] = @network_host if @network_host
|
309
|
+
client_settings["transport.tcp.port"] = @transport_tcp_port if @transport_tcp_port
|
310
|
+
client_settings["client.transport.sniff"] = @sniffing
|
311
|
+
|
312
|
+
if @node_name
|
313
|
+
client_settings["node.name"] = @node_name
|
314
|
+
else
|
315
|
+
client_settings["node.name"] = "logstash-#{Socket.gethostname}-#{$$}-#{object_id}"
|
316
|
+
end
|
317
|
+
|
318
|
+
@@plugins.each do |plugin|
|
319
|
+
name = plugin.name.split('-')[-1]
|
320
|
+
client_settings.merge!(LogStash::Outputs::ElasticSearchJava.const_get(name.capitalize).create_client_config(self))
|
321
|
+
end
|
322
|
+
|
323
|
+
if (@hosts.nil? || @hosts.empty?) && @protocol != "node" # node can use zen discovery
|
324
|
+
@logger.info("No 'hosts' set in elasticsearch output. Defaulting to localhost")
|
325
|
+
@hosts = ["localhost"]
|
326
|
+
end
|
327
|
+
|
328
|
+
common_options = {
|
329
|
+
:protocol => @protocol,
|
330
|
+
:client_settings => client_settings,
|
331
|
+
:hosts => @hosts,
|
332
|
+
:port => @port
|
333
|
+
}
|
334
|
+
|
335
|
+
# Update API setup
|
336
|
+
update_options = {
|
337
|
+
:upsert => @upsert,
|
338
|
+
:doc_as_upsert => @doc_as_upsert
|
339
|
+
}
|
340
|
+
common_options.merge! update_options if @action == 'update'
|
341
|
+
|
342
|
+
client_class = case @protocol
|
343
|
+
when "transport"
|
344
|
+
LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::TransportClient
|
345
|
+
when "node"
|
346
|
+
LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::NodeClient
|
347
|
+
end
|
348
|
+
|
349
|
+
@client = client_class.new(common_options)
|
350
|
+
|
351
|
+
if @manage_template
|
352
|
+
begin
|
353
|
+
@logger.info("Automatic template management enabled", :manage_template => @manage_template.to_s)
|
354
|
+
client.template_install(@template_name, get_template, @template_overwrite)
|
355
|
+
rescue => e
|
356
|
+
@logger.error("Failed to install template",
|
357
|
+
:message => e.message,
|
358
|
+
:error_class => e.class.name,
|
359
|
+
:backtrace => e.backtrace
|
360
|
+
)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
@logger.info("New Elasticsearch output", :cluster => @cluster,
|
365
|
+
:hosts => @host, :port => @port, :protocol => @protocol)
|
366
|
+
|
367
|
+
buffer_initialize(
|
368
|
+
:max_items => @flush_size,
|
369
|
+
:max_interval => @idle_flush_time,
|
370
|
+
:logger => @logger
|
371
|
+
)
|
372
|
+
|
373
|
+
@retry_timer_thread = Thread.new do
|
374
|
+
loop do
|
375
|
+
sleep(@retry_max_interval)
|
376
|
+
@retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
@retry_thread = Thread.new do
|
381
|
+
while @retry_teardown_requested.false?
|
382
|
+
@retry_flush_mutex.synchronize { @retry_queue_needs_flushing.wait(@retry_flush_mutex) }
|
383
|
+
retry_flush
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end # def register
|
387
|
+
|
388
|
+
|
389
|
+
public
|
390
|
+
def get_template
|
391
|
+
if @template.nil?
|
392
|
+
@template = ::File.expand_path('elasticsearch_java/elasticsearch-template.json', ::File.dirname(__FILE__))
|
393
|
+
if !File.exists?(@template)
|
394
|
+
raise "You must specify 'template => ...' in your elasticsearch output (I looked for '#{@template}')"
|
395
|
+
end
|
396
|
+
end
|
397
|
+
template_json = IO.read(@template).gsub(/\n/,'')
|
398
|
+
template = LogStash::Json.load(template_json)
|
399
|
+
@logger.info("Using mapping template", :template => template)
|
400
|
+
return template
|
401
|
+
end # def get_template
|
402
|
+
|
403
|
+
public
|
404
|
+
def receive(event)
|
405
|
+
return unless output?(event)
|
406
|
+
|
407
|
+
# block until we have not maxed out our
|
408
|
+
# retry queue. This is applying back-pressure
|
409
|
+
# to slow down the receive-rate
|
410
|
+
@retry_flush_mutex.synchronize {
|
411
|
+
@retry_queue_not_full.wait(@retry_flush_mutex) while @retry_queue.size > @retry_max_items
|
412
|
+
}
|
413
|
+
|
414
|
+
event['@metadata']['retry_count'] = 0
|
415
|
+
|
416
|
+
# Set the 'type' value for the index.
|
417
|
+
type = if @document_type
|
418
|
+
event.sprintf(@document_type)
|
419
|
+
elsif @index_type # deprecated
|
420
|
+
event.sprintf(@index_type)
|
421
|
+
else
|
422
|
+
event["type"] || "logs"
|
423
|
+
end
|
424
|
+
|
425
|
+
params = {
|
426
|
+
:_id => @document_id ? event.sprintf(@document_id) : nil,
|
427
|
+
:_index => event.sprintf(@index),
|
428
|
+
:_type => type,
|
429
|
+
:_routing => @routing ? event.sprintf(@routing) : nil
|
430
|
+
}
|
431
|
+
|
432
|
+
params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @action == 'update' && @upsert != ""
|
433
|
+
|
434
|
+
buffer_receive([event.sprintf(@action), params, event])
|
435
|
+
end # def receive
|
436
|
+
|
437
|
+
public
|
438
|
+
# The submit method can be called from both the
|
439
|
+
# Stud::Buffer flush thread and from our own retry thread.
|
440
|
+
def submit(actions)
|
441
|
+
es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash] }
|
442
|
+
@submit_mutex.lock
|
443
|
+
begin
|
444
|
+
bulk_response = client.bulk(es_actions)
|
445
|
+
ensure
|
446
|
+
@submit_mutex.unlock
|
447
|
+
end
|
448
|
+
if bulk_response["errors"]
|
449
|
+
actions_with_responses = actions.zip(bulk_response['statuses'])
|
450
|
+
actions_to_retry = []
|
451
|
+
actions_with_responses.each do |action, resp_code|
|
452
|
+
if RETRYABLE_CODES.include?(resp_code)
|
453
|
+
@logger.warn "retrying failed action with response code: #{resp_code}"
|
454
|
+
actions_to_retry << action
|
455
|
+
elsif not SUCCESS_CODES.include?(resp_code)
|
456
|
+
@logger.warn "failed action with response of #{resp_code}, dropping action: #{action}"
|
457
|
+
end
|
458
|
+
end
|
459
|
+
retry_push(actions_to_retry) unless actions_to_retry.empty?
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
# When there are exceptions raised upon submission, we raise an exception so that
|
464
|
+
# Stud::Buffer will retry to flush
|
465
|
+
public
|
466
|
+
def flush(actions, teardown = false)
|
467
|
+
begin
|
468
|
+
submit(actions)
|
469
|
+
rescue => e
|
470
|
+
@logger.error "Got error to send bulk of actions: #{e.message}"
|
471
|
+
raise e
|
472
|
+
end
|
473
|
+
end # def flush
|
474
|
+
|
475
|
+
public
|
476
|
+
def teardown
|
477
|
+
if @cacert # remove temporary jks store created from the cacert
|
478
|
+
File.delete(@truststore)
|
479
|
+
end
|
480
|
+
|
481
|
+
@retry_teardown_requested.make_true
|
482
|
+
# First, make sure retry_timer_thread is stopped
|
483
|
+
# to ensure we do not signal a retry based on
|
484
|
+
# the retry interval.
|
485
|
+
Thread.kill(@retry_timer_thread)
|
486
|
+
@retry_timer_thread.join
|
487
|
+
# Signal flushing in the case that #retry_flush is in
|
488
|
+
# the process of waiting for a signal.
|
489
|
+
@retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
|
490
|
+
# Now, #retry_flush is ensured to not be in a state of
|
491
|
+
# waiting and can be safely joined into the main thread
|
492
|
+
# for further final execution of an in-process remaining call.
|
493
|
+
@retry_thread.join
|
494
|
+
|
495
|
+
# execute any final actions along with a proceeding retry for any
|
496
|
+
# final actions that did not succeed.
|
497
|
+
buffer_flush(:final => true)
|
498
|
+
retry_flush
|
499
|
+
end
|
500
|
+
|
501
|
+
private
|
502
|
+
# in charge of submitting any actions in @retry_queue that need to be
|
503
|
+
# retried
|
504
|
+
#
|
505
|
+
# This method is not called concurrently. It is only called by @retry_thread
|
506
|
+
# and once that thread is ended during the teardown process, a final call
|
507
|
+
# to this method is done upon teardown in the main thread.
|
508
|
+
def retry_flush()
|
509
|
+
unless @retry_queue.empty?
|
510
|
+
buffer = @retry_queue.size.times.map do
|
511
|
+
next_action, next_doc, next_event = @retry_queue.pop
|
512
|
+
next_event['@metadata']['retry_count'] += 1
|
513
|
+
|
514
|
+
if next_event['@metadata']['retry_count'] > @max_retries
|
515
|
+
@logger.error "too many attempts at sending event. dropping: #{next_event}"
|
516
|
+
nil
|
517
|
+
else
|
518
|
+
[next_action, next_doc, next_event]
|
519
|
+
end
|
520
|
+
end.compact
|
521
|
+
|
522
|
+
submit(buffer) unless buffer.empty?
|
523
|
+
end
|
524
|
+
|
525
|
+
@retry_flush_mutex.synchronize {
|
526
|
+
@retry_queue_not_full.signal if @retry_queue.size < @retry_max_items
|
527
|
+
}
|
528
|
+
end
|
529
|
+
|
530
|
+
private
|
531
|
+
def retry_push(actions)
|
532
|
+
Array(actions).each{|action| @retry_queue << action}
|
533
|
+
@retry_flush_mutex.synchronize {
|
534
|
+
@retry_queue_needs_flushing.signal if @retry_queue.size >= @retry_max_items
|
535
|
+
}
|
536
|
+
end
|
537
|
+
|
538
|
+
@@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-elasticsearch_java-/ }
|
539
|
+
|
540
|
+
@@plugins.each do |plugin|
|
541
|
+
name = plugin.name.split('-')[-1]
|
542
|
+
require "logstash/outputs/elasticsearch_java/#{name}"
|
543
|
+
end
|
544
|
+
|
545
|
+
end # class LogStash::Outputs::ElasticSearchJava
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-output-elasticsearch_java'
|
3
|
+
s.version = '1.0.0.beta1'
|
4
|
+
s.licenses = ['apache-2.0']
|
5
|
+
s.summary = "Logstash Output to Elasticsearch using Java node/transport client"
|
6
|
+
s.description = "Output events to elasticsearch using the java client"
|
7
|
+
s.authors = ["Elastic"]
|
8
|
+
s.email = 'info@elastic.co'
|
9
|
+
s.homepage = "http://logstash.net/"
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = `git ls-files`.split($\)
|
14
|
+
|
15
|
+
# Tests
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
|
18
|
+
# Special flag to let us know this is actually a logstash plugin
|
19
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
|
20
|
+
|
21
|
+
# Gem dependencies
|
22
|
+
s.add_runtime_dependency 'concurrent-ruby'
|
23
|
+
s.add_runtime_dependency 'elasticsearch', ['>= 1.0.10', '~> 1.0']
|
24
|
+
s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
|
25
|
+
s.add_runtime_dependency 'cabin', ['~> 0.6']
|
26
|
+
s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0'
|
27
|
+
|
28
|
+
s.add_development_dependency 'ftw', '~> 0.0.42'
|
29
|
+
s.add_development_dependency 'logstash-input-generator'
|
30
|
+
s.add_development_dependency 'logstash-devutils'
|
31
|
+
s.add_development_dependency 'longshoreman'
|
32
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
2
|
+
require "ftw"
|
3
|
+
require "logstash/plugin"
|
4
|
+
require "logstash/json"
|
5
|
+
require "stud/try"
|
6
|
+
require "longshoreman"
|
7
|
+
require "logstash/outputs/elasticsearch_java"
|
8
|
+
require "logstash/outputs/elasticsearch_java/protocol"
|
9
|
+
|
10
|
+
CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
|
11
|
+
CONTAINER_IMAGE = "elasticsearch"
|
12
|
+
CONTAINER_TAG = "1.6"
|
13
|
+
|
14
|
+
DOCKER_INTEGRATION = ENV["DOCKER_INTEGRATION"]
|
15
|
+
|
16
|
+
module ESHelper
|
17
|
+
def get_local_host
|
18
|
+
"127.0.0.1"
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_host
|
22
|
+
DOCKER_INTEGRATION ? Longshoreman.new.get_host_ip : "127.0.0.1"
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_port(protocol)
|
26
|
+
unless DOCKER_INTEGRATION
|
27
|
+
return protocol.to_sym == :http ? 9200 : 9300
|
28
|
+
end
|
29
|
+
|
30
|
+
container = Longshoreman::Container.new
|
31
|
+
container.get(CONTAINER_NAME)
|
32
|
+
container.rport(9300)
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_client
|
36
|
+
Elasticsearch::Client.new(:hosts => "#{get_host}:#{get_port('http')}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
RSpec.configure do |config|
|
42
|
+
config.include ESHelper
|
43
|
+
|
44
|
+
|
45
|
+
if DOCKER_INTEGRATION
|
46
|
+
# this :all hook gets run before every describe block that is tagged with :integration => true.
|
47
|
+
config.before(:all, :integration => true) do
|
48
|
+
|
49
|
+
|
50
|
+
# check if container exists already before creating new one.
|
51
|
+
begin
|
52
|
+
ls = Longshoreman::new
|
53
|
+
ls.container.get(CONTAINER_NAME)
|
54
|
+
rescue Docker::Error::NotFoundError
|
55
|
+
Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME)
|
56
|
+
# TODO(talevy): verify ES is running instead of static timeout
|
57
|
+
sleep 10
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# we want to do a final cleanup after all :integration runs,
|
62
|
+
# but we don't want to clean up before the last block.
|
63
|
+
# This is a final blind check to see if the ES docker container is running and
|
64
|
+
# needs to be cleaned up. If no container can be found and/or docker is not
|
65
|
+
# running on the system, we do nothing.
|
66
|
+
config.after(:suite) do
|
67
|
+
# only cleanup docker container if system has docker and the container is running
|
68
|
+
begin
|
69
|
+
ls = Longshoreman::new
|
70
|
+
ls.container.get(CONTAINER_NAME)
|
71
|
+
ls.cleanup
|
72
|
+
rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
|
73
|
+
# do nothing
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
config.after(:each) do
|
79
|
+
LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::NodeClient.clear_client()
|
80
|
+
end
|
81
|
+
end
|