logstash-output-elasticsearch_java 1.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +31 -0
- data/Gemfile +3 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +5 -0
- data/README.md +98 -0
- data/Rakefile +1 -0
- data/lib/logstash/outputs/elasticsearch_java/elasticsearch-template.json +41 -0
- data/lib/logstash/outputs/elasticsearch_java/protocol.rb +258 -0
- data/lib/logstash/outputs/elasticsearch_java.rb +545 -0
- data/lib/logstash-output-elasticsearch_java_jars.rb +5 -0
- data/logstash-output-elasticsearch_java.gemspec +32 -0
- data/spec/es_spec_helper.rb +81 -0
- data/spec/integration/outputs/elasticsearch/node_spec.rb +36 -0
- data/spec/integration/outputs/index_spec.rb +90 -0
- data/spec/integration/outputs/retry_spec.rb +148 -0
- data/spec/integration/outputs/routing_spec.rb +60 -0
- data/spec/integration/outputs/secure_spec.rb +113 -0
- data/spec/integration/outputs/templates_spec.rb +97 -0
- data/spec/integration/outputs/transport_create_spec.rb +94 -0
- data/spec/integration/outputs/update_spec.rb +88 -0
- data/spec/unit/outputs/elasticsearch/protocol_spec.rb +32 -0
- data/spec/unit/outputs/elasticsearch_spec.rb +79 -0
- data/vendor/jar-dependencies/runtime-jars/antlr-runtime-3.5.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/asm-4.1.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/asm-commons-4.1.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/elasticsearch-1.7.0.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-analyzers-common-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-core-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-grouping-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-highlighter-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-join-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-memory-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-misc-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-queries-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-queryparser-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-sandbox-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-spatial-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lucene-suggest-4.10.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/spatial4j-0.4.1.jar +0 -0
- metadata +241 -0
@@ -0,0 +1,545 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/namespace"
|
3
|
+
require "logstash/environment"
|
4
|
+
require "logstash/outputs/base"
|
5
|
+
require "logstash/json"
|
6
|
+
require "concurrent"
|
7
|
+
require "stud/buffer"
|
8
|
+
require "socket" # for Socket.gethostname
|
9
|
+
require "thread" # for safe queueing
|
10
|
+
require "uri" # for escaping user input
|
11
|
+
require "logstash/outputs/elasticsearch_java/protocol"
|
12
|
+
|
13
|
+
# This output lets you store logs in Elasticsearch using the native 'node' and 'transport'
|
14
|
+
# protocols. It is highly recommended to use the regular 'logstash-output-elasticsearch' output
|
15
|
+
# which uses HTTP instead. This output is, in-fact, sometimes slower, and never faster than that one.
|
16
|
+
# Additionally, upgrading your Elasticsearch cluster may require you to simultaneously update this
|
17
|
+
# plugin for any protocol level changes. The HTTP client may be easier to work with due to wider
|
18
|
+
# familiarity with HTTP.
|
19
|
+
#
|
20
|
+
# *VERSION NOTE*: Your Elasticsearch cluster must be running Elasticsearch 1.0.0 or later.
|
21
|
+
#
|
22
|
+
# If you want to set other Elasticsearch options that are not exposed directly
|
23
|
+
# as configuration options, there are two methods:
|
24
|
+
#
|
25
|
+
# * Create an `elasticsearch.yml` file in the $PWD of the Logstash process
|
26
|
+
# * Pass in es.* java properties (`java -Des.node.foo=` or `ruby -J-Des.node.foo=`)
|
27
|
+
#
|
28
|
+
# With the default `protocol` setting ("node"), this plugin will join your
|
29
|
+
# Elasticsearch cluster as a client node, so it will show up in Elasticsearch's
|
30
|
+
# cluster status.
|
31
|
+
#
|
32
|
+
# You can learn more about Elasticsearch at <https://www.elastic.co/products/elasticsearch>
|
33
|
+
#
|
34
|
+
# ==== Operational Notes
|
35
|
+
#
|
36
|
+
# If using the default `protocol` setting ("node"), your firewalls might need
|
37
|
+
# to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and
|
38
|
+
# Elasticsearch to Logstash)
|
39
|
+
#
|
40
|
+
# ==== Retry Policy
|
41
|
+
#
|
42
|
+
# By default all bulk requests to ES are synchronous. Not all events in the bulk requests
|
43
|
+
# always make it successfully. For example, there could be events which are not formatted
|
44
|
+
# correctly for the index they are targeting (type mismatch in mapping). So that we minimize loss of
|
45
|
+
# events, we have a specific retry policy in place. We retry all events which fail to be reached by
|
46
|
+
# Elasticsearch for network related issues. We retry specific events which exhibit errors under a separate
|
47
|
+
# policy described below. Events of this nature are ones which experience ES error codes described as
|
48
|
+
# retryable errors.
|
49
|
+
#
|
50
|
+
# *Retryable Errors:*
|
51
|
+
#
|
52
|
+
# - 429, Too Many Requests (RFC6585)
|
53
|
+
# - 503, The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.
|
54
|
+
#
|
55
|
+
# Here are the rules of what is retried when:
|
56
|
+
#
|
57
|
+
# - Block and retry all events in bulk response that experiences transient network exceptions until
|
58
|
+
# a successful submission is received by Elasticsearch.
|
59
|
+
# - Retry subset of sent events which resulted in ES errors of a retryable nature which can be found
|
60
|
+
# in RETRYABLE_CODES
|
61
|
+
# - For events which returned retryable error codes, they will be pushed onto a separate queue for
|
62
|
+
# retrying events. events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries). The size of
|
63
|
+
# this queue is capped by the value set in :retry_max_items.
|
64
|
+
# - Events from the retry queue are submitted again either when the queue reaches its max size or when
|
65
|
+
# the max interval time is reached, which is set in :retry_max_interval.
|
66
|
+
# - Events which are not retryable or have reached their max retry count are logged to stderr.
|
67
|
+
class LogStash::Outputs::ElasticSearchJava < LogStash::Outputs::Base
|
68
|
+
attr_reader :client
|
69
|
+
|
70
|
+
include Stud::Buffer
|
71
|
+
RETRYABLE_CODES = [409, 429, 503]
|
72
|
+
SUCCESS_CODES = [200, 201]
|
73
|
+
|
74
|
+
config_name "elasticsearch_java"
|
75
|
+
|
76
|
+
# The index to write events to. This can be dynamic using the `%{foo}` syntax.
|
77
|
+
# The default value will partition your indices by day so you can more easily
|
78
|
+
# delete old data or only search specific date ranges.
|
79
|
+
# Indexes may not contain uppercase characters.
|
80
|
+
# For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}
|
81
|
+
config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
|
82
|
+
|
83
|
+
# The index type to write events to. Generally you should try to write only
|
84
|
+
# similar events to the same 'type'. String expansion `%{foo}` works here.
|
85
|
+
#
|
86
|
+
# Deprecated in favor of `document_type` field.
|
87
|
+
config :index_type, :validate => :string, :deprecated => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
|
88
|
+
|
89
|
+
# The document type to write events to. Generally you should try to write only
|
90
|
+
# similar events to the same 'type'. String expansion `%{foo}` works here.
|
91
|
+
# Unless you set 'document_type', the event 'type' will be used if it exists
|
92
|
+
# otherwise the document type will be assigned the value of 'logs'
|
93
|
+
config :document_type, :validate => :string
|
94
|
+
|
95
|
+
# Starting in Logstash 1.3 (unless you set option `manage_template` to false)
|
96
|
+
# a default mapping template for Elasticsearch will be applied, if you do not
|
97
|
+
# already have one set to match the index pattern defined (default of
|
98
|
+
# `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
|
99
|
+
# the template will be applied to all indices starting with `logstash-*`
|
100
|
+
#
|
101
|
+
# If you have dynamic templating (e.g. creating indices based on field names)
|
102
|
+
# then you should set `manage_template` to false and use the REST API to upload
|
103
|
+
# your templates manually.
|
104
|
+
config :manage_template, :validate => :boolean, :default => true
|
105
|
+
|
106
|
+
# This configuration option defines how the template is named inside Elasticsearch.
|
107
|
+
# Note that if you have used the template management features and subsequently
|
108
|
+
# change this, you will need to prune the old template manually, e.g.
|
109
|
+
#
|
110
|
+
# `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
|
111
|
+
#
|
112
|
+
# where `OldTemplateName` is whatever the former setting was.
|
113
|
+
config :template_name, :validate => :string, :default => "logstash"
|
114
|
+
|
115
|
+
# You can set the path to your own template here, if you so desire.
|
116
|
+
# If not set, the included template will be used.
|
117
|
+
config :template, :validate => :path
|
118
|
+
|
119
|
+
# Overwrite the current template with whatever is configured
|
120
|
+
# in the `template` and `template_name` directives.
|
121
|
+
config :template_overwrite, :validate => :boolean, :default => false
|
122
|
+
|
123
|
+
# The document ID for the index. Useful for overwriting existing entries in
|
124
|
+
# Elasticsearch with the same ID.
|
125
|
+
config :document_id, :validate => :string
|
126
|
+
|
127
|
+
# A routing override to be applied to all processed events.
|
128
|
+
# This can be dynamic using the `%{foo}` syntax.
|
129
|
+
config :routing, :validate => :string
|
130
|
+
|
131
|
+
# The name of your cluster if you set it on the Elasticsearch side. Useful
|
132
|
+
# for discovery when using `node` or `transport` protocols.
|
133
|
+
# By default, it looks for a cluster named 'elasticsearch'.
|
134
|
+
# Equivalent to the Elasticsearch option 'cluster.name'
|
135
|
+
config :cluster, :validate => :string
|
136
|
+
|
137
|
+
# For the `node` protocol, if you do not specify `host`, it will attempt to use
|
138
|
+
# multicast discovery to connect to Elasticsearch. If http://www.elastic.co/guide/en/elasticsearch/guide/current/_important_configuration_changes.html#_prefer_unicast_over_multicast[multicast is disabled] in Elasticsearch,
|
139
|
+
# you must include the hostname or IP address of the host(s) to use for Elasticsearch unicast discovery.
|
140
|
+
# Remember the `node` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#modules-transport[transport] address (eg. 9300, not 9200).
|
141
|
+
# `"127.0.0.1"`
|
142
|
+
# `["127.0.0.1:9300","127.0.0.2:9300"]`
|
143
|
+
# When setting hosts for `node` protocol, it is important to confirm that at least one non-client
|
144
|
+
# node is listed in the `host` list. Also keep in mind that the `host` parameter when used with
|
145
|
+
# the `node` protocol is for *discovery purposes only* (not for load balancing). When multiple hosts
|
146
|
+
# are specified, it will contact the first host to see if it can use it to discover the cluster. If not,
|
147
|
+
# then it will contact the second host in the list and so forth. With the `node` protocol,
|
148
|
+
# Logstash will join the Elasticsearch cluster as a node client (which has a copy of the cluster
|
149
|
+
# state) and this node client is the one that will automatically handle the load balancing of requests
|
150
|
+
# across data nodes in the cluster.
|
151
|
+
# If you are looking for a high availability setup, our recommendation is to use the `transport` protocol (below),
|
152
|
+
# set up multiple http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[client nodes] and list the client nodes in the `host` parameter.
|
153
|
+
#
|
154
|
+
# For the `transport` protocol, it will load balance requests across the hosts specified in the `host` parameter.
|
155
|
+
# Remember the `transport` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#modules-transport[transport] address (eg. 9300, not 9200).
|
156
|
+
# `"127.0.0.1"`
|
157
|
+
# `["127.0.0.1:9300","127.0.0.2:9300"]`
|
158
|
+
# There is also a `sniffing` option (see below) that can be used with the transport protocol to instruct it to use the host to sniff for
|
159
|
+
# "alive" nodes in the cluster and automatically use it as the hosts list (but will skip the dedicated master nodes).
|
160
|
+
# If you do not use the sniffing option, it is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
|
161
|
+
# to prevent Logstash from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
|
162
|
+
#
|
163
|
+
# For the `http` protocol, it will load balance requests across the hosts specified in the `host` parameter.
|
164
|
+
# Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
|
165
|
+
# `"127.0.0.1"`
|
166
|
+
# `["127.0.0.1:9200","127.0.0.2:9200"]`
|
167
|
+
# It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `host` list
|
168
|
+
# to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes.
|
169
|
+
config :hosts, :validate => :array, :default => ["127.0.0.1"]
|
170
|
+
|
171
|
+
# The port for Elasticsearch transport to use.
|
172
|
+
#
|
173
|
+
# If you do not set this, the following defaults are used:
|
174
|
+
# * `protocol => transport` - port 9300-9305
|
175
|
+
# * `protocol => node` - port 9300-9305
|
176
|
+
config :port, :validate => :string, :default => "9300-9305"
|
177
|
+
|
178
|
+
# The name/address of the host to bind to for Elasticsearch clustering. Equivalent to the Elasticsearch option 'network.host'
|
179
|
+
# option.
|
180
|
+
# This MUST be set for either protocol to work (node or transport)! The internal Elasticsearch node
|
181
|
+
# will bind to this ip. This ip MUST be reachable by all nodes in the Elasticsearch cluster
|
182
|
+
config :network_host, :validate => :string
|
183
|
+
|
184
|
+
# This sets the local port to bind to. Equivalent to the Elasticsrearch option 'transport.tcp.port'
|
185
|
+
config :transport_tcp_port, :validate => :number
|
186
|
+
|
187
|
+
# This setting no longer does anything. It exists to keep config validation
|
188
|
+
# from failing. It will be removed in future versions.
|
189
|
+
config :max_inflight_requests, :validate => :number, :default => 50, :deprecated => true
|
190
|
+
|
191
|
+
# The node name Elasticsearch will use when joining a cluster.
|
192
|
+
#
|
193
|
+
# By default, this is generated internally by the ES client.
|
194
|
+
config :node_name, :validate => :string
|
195
|
+
|
196
|
+
# This plugin uses the bulk index api for improved indexing performance.
|
197
|
+
# To make efficient bulk api calls, we will buffer a certain number of
|
198
|
+
# events before flushing that out to Elasticsearch. This setting
|
199
|
+
# controls how many events will be buffered before sending a batch
|
200
|
+
# of events.
|
201
|
+
config :flush_size, :validate => :number, :default => 500
|
202
|
+
|
203
|
+
# The amount of time since last flush before a flush is forced.
|
204
|
+
#
|
205
|
+
# This setting helps ensure slow event rates don't get stuck in Logstash.
|
206
|
+
# For example, if your `flush_size` is 100, and you have received 10 events,
|
207
|
+
# and it has been more than `idle_flush_time` seconds since the last flush,
|
208
|
+
# Logstash will flush those 10 events automatically.
|
209
|
+
#
|
210
|
+
# This helps keep both fast and slow log streams moving along in
|
211
|
+
# near-real-time.
|
212
|
+
config :idle_flush_time, :validate => :number, :default => 1
|
213
|
+
|
214
|
+
# Choose the protocol used to talk to Elasticsearch.
|
215
|
+
#
|
216
|
+
# The 'node' protocol (default) will connect to the cluster as a normal Elasticsearch
|
217
|
+
# node (but will not store data). If you use the `node` protocol, you must permit
|
218
|
+
# bidirectional communication on the port 9300 (or whichever port you have
|
219
|
+
# configured).
|
220
|
+
#
|
221
|
+
# If you do not specify the `host` parameter, it will use multicast for http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html[Elasticsearch discovery]. While this may work in a test/dev environment where multicast is enabled in
|
222
|
+
# Elasticsearch, we strongly recommend http://www.elastic.co/guide/en/elasticsearch/guide/current/_important_configuration_changes.html#_prefer_unicast_over_multicast[disabling multicast]
|
223
|
+
# in Elasticsearch. To connect to an Elasticsearch cluster with multicast disabled,
|
224
|
+
# you must include the `host` parameter (see relevant section above).
|
225
|
+
#
|
226
|
+
# The 'transport' protocol will connect to the host you specify and will
|
227
|
+
# not show up as a 'node' in the Elasticsearch cluster. This is useful
|
228
|
+
# in situations where you cannot permit connections outbound from the
|
229
|
+
# Elasticsearch cluster to this Logstash server.
|
230
|
+
#
|
231
|
+
# All protocols will use bulk requests when talking to Elasticsearch.
|
232
|
+
config :protocol, :validate => [ "node", "transport"], :default => "transport"
|
233
|
+
|
234
|
+
# The Elasticsearch action to perform. Valid actions are: `index`, `delete`.
|
235
|
+
#
|
236
|
+
# Use of this setting *REQUIRES* you also configure the `document_id` setting
|
237
|
+
# because `delete` actions all require a document id.
|
238
|
+
#
|
239
|
+
# What does each action do?
|
240
|
+
#
|
241
|
+
# - index: indexes a document (an event from Logstash).
|
242
|
+
# - delete: deletes a document by id
|
243
|
+
# - create: indexes a document, fails if a document by that id already exists in the index.
|
244
|
+
# - update: updates a document by id
|
245
|
+
# following action is not supported by HTTP protocol
|
246
|
+
# - create_unless_exists: creates a document, fails if no id is provided
|
247
|
+
#
|
248
|
+
# For more details on actions, check out the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html[Elasticsearch bulk API documentation]
|
249
|
+
config :action, :validate => :string, :default => "index"
|
250
|
+
|
251
|
+
# Validate the server's certificate
|
252
|
+
# Disabling this severely compromises security
|
253
|
+
# For more information read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf
|
254
|
+
config :ssl_certificate_verification, :validate => :boolean, :default => true
|
255
|
+
|
256
|
+
# The .cer or .pem file to validate the server's certificate
|
257
|
+
config :cacert, :validate => :path
|
258
|
+
|
259
|
+
# The JKS truststore to validate the server's certificate
|
260
|
+
# Use either `:truststore` or `:cacert`
|
261
|
+
config :truststore, :validate => :path
|
262
|
+
|
263
|
+
# Set the truststore password
|
264
|
+
config :truststore_password, :validate => :password
|
265
|
+
|
266
|
+
# The keystore used to present a certificate to the server
|
267
|
+
# It can be either .jks or .p12
|
268
|
+
config :keystore, :validate => :path
|
269
|
+
|
270
|
+
# Set the truststore password
|
271
|
+
config :keystore_password, :validate => :password
|
272
|
+
|
273
|
+
# Enable cluster sniffing (transport only).
|
274
|
+
# Asks host for the list of all cluster nodes and adds them to the hosts list
|
275
|
+
# Equivalent to the Elasticsearch option 'client.transport.sniff'
|
276
|
+
config :sniffing, :validate => :boolean, :default => false
|
277
|
+
|
278
|
+
# Set max retry for each event
|
279
|
+
config :max_retries, :validate => :number, :default => 3
|
280
|
+
|
281
|
+
# Set retry policy for events that failed to send
|
282
|
+
config :retry_max_items, :validate => :number, :default => 5000
|
283
|
+
|
284
|
+
# Set max interval between bulk retries
|
285
|
+
config :retry_max_interval, :validate => :number, :default => 5
|
286
|
+
|
287
|
+
# Enable doc_as_upsert for update mode
|
288
|
+
# create a new document with source if document_id doesn't exists
|
289
|
+
config :doc_as_upsert, :validate => :boolean, :default => false
|
290
|
+
|
291
|
+
# Set upsert content for update mode
|
292
|
+
# create a new document with this parameter as json string if document_id doesn't exists
|
293
|
+
config :upsert, :validate => :string, :default => ""
|
294
|
+
|
295
|
+
public
|
296
|
+
def register
|
297
|
+
@submit_mutex = Mutex.new
|
298
|
+
# retry-specific variables
|
299
|
+
@retry_flush_mutex = Mutex.new
|
300
|
+
@retry_teardown_requested = Concurrent::AtomicBoolean.new(false)
|
301
|
+
# needs flushing when interval
|
302
|
+
@retry_queue_needs_flushing = ConditionVariable.new
|
303
|
+
@retry_queue_not_full = ConditionVariable.new
|
304
|
+
@retry_queue = Queue.new
|
305
|
+
|
306
|
+
client_settings = {}
|
307
|
+
client_settings["cluster.name"] = @cluster if @cluster
|
308
|
+
client_settings["network.host"] = @network_host if @network_host
|
309
|
+
client_settings["transport.tcp.port"] = @transport_tcp_port if @transport_tcp_port
|
310
|
+
client_settings["client.transport.sniff"] = @sniffing
|
311
|
+
|
312
|
+
if @node_name
|
313
|
+
client_settings["node.name"] = @node_name
|
314
|
+
else
|
315
|
+
client_settings["node.name"] = "logstash-#{Socket.gethostname}-#{$$}-#{object_id}"
|
316
|
+
end
|
317
|
+
|
318
|
+
@@plugins.each do |plugin|
|
319
|
+
name = plugin.name.split('-')[-1]
|
320
|
+
client_settings.merge!(LogStash::Outputs::ElasticSearchJava.const_get(name.capitalize).create_client_config(self))
|
321
|
+
end
|
322
|
+
|
323
|
+
if (@hosts.nil? || @hosts.empty?) && @protocol != "node" # node can use zen discovery
|
324
|
+
@logger.info("No 'hosts' set in elasticsearch output. Defaulting to localhost")
|
325
|
+
@hosts = ["localhost"]
|
326
|
+
end
|
327
|
+
|
328
|
+
common_options = {
|
329
|
+
:protocol => @protocol,
|
330
|
+
:client_settings => client_settings,
|
331
|
+
:hosts => @hosts,
|
332
|
+
:port => @port
|
333
|
+
}
|
334
|
+
|
335
|
+
# Update API setup
|
336
|
+
update_options = {
|
337
|
+
:upsert => @upsert,
|
338
|
+
:doc_as_upsert => @doc_as_upsert
|
339
|
+
}
|
340
|
+
common_options.merge! update_options if @action == 'update'
|
341
|
+
|
342
|
+
client_class = case @protocol
|
343
|
+
when "transport"
|
344
|
+
LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::TransportClient
|
345
|
+
when "node"
|
346
|
+
LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::NodeClient
|
347
|
+
end
|
348
|
+
|
349
|
+
@client = client_class.new(common_options)
|
350
|
+
|
351
|
+
if @manage_template
|
352
|
+
begin
|
353
|
+
@logger.info("Automatic template management enabled", :manage_template => @manage_template.to_s)
|
354
|
+
client.template_install(@template_name, get_template, @template_overwrite)
|
355
|
+
rescue => e
|
356
|
+
@logger.error("Failed to install template",
|
357
|
+
:message => e.message,
|
358
|
+
:error_class => e.class.name,
|
359
|
+
:backtrace => e.backtrace
|
360
|
+
)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
@logger.info("New Elasticsearch output", :cluster => @cluster,
|
365
|
+
:hosts => @host, :port => @port, :protocol => @protocol)
|
366
|
+
|
367
|
+
buffer_initialize(
|
368
|
+
:max_items => @flush_size,
|
369
|
+
:max_interval => @idle_flush_time,
|
370
|
+
:logger => @logger
|
371
|
+
)
|
372
|
+
|
373
|
+
@retry_timer_thread = Thread.new do
|
374
|
+
loop do
|
375
|
+
sleep(@retry_max_interval)
|
376
|
+
@retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
@retry_thread = Thread.new do
|
381
|
+
while @retry_teardown_requested.false?
|
382
|
+
@retry_flush_mutex.synchronize { @retry_queue_needs_flushing.wait(@retry_flush_mutex) }
|
383
|
+
retry_flush
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end # def register
|
387
|
+
|
388
|
+
|
389
|
+
public
|
390
|
+
def get_template
|
391
|
+
if @template.nil?
|
392
|
+
@template = ::File.expand_path('elasticsearch_java/elasticsearch-template.json', ::File.dirname(__FILE__))
|
393
|
+
if !File.exists?(@template)
|
394
|
+
raise "You must specify 'template => ...' in your elasticsearch output (I looked for '#{@template}')"
|
395
|
+
end
|
396
|
+
end
|
397
|
+
template_json = IO.read(@template).gsub(/\n/,'')
|
398
|
+
template = LogStash::Json.load(template_json)
|
399
|
+
@logger.info("Using mapping template", :template => template)
|
400
|
+
return template
|
401
|
+
end # def get_template
|
402
|
+
|
403
|
+
public
|
404
|
+
def receive(event)
|
405
|
+
return unless output?(event)
|
406
|
+
|
407
|
+
# block until we have not maxed out our
|
408
|
+
# retry queue. This is applying back-pressure
|
409
|
+
# to slow down the receive-rate
|
410
|
+
@retry_flush_mutex.synchronize {
|
411
|
+
@retry_queue_not_full.wait(@retry_flush_mutex) while @retry_queue.size > @retry_max_items
|
412
|
+
}
|
413
|
+
|
414
|
+
event['@metadata']['retry_count'] = 0
|
415
|
+
|
416
|
+
# Set the 'type' value for the index.
|
417
|
+
type = if @document_type
|
418
|
+
event.sprintf(@document_type)
|
419
|
+
elsif @index_type # deprecated
|
420
|
+
event.sprintf(@index_type)
|
421
|
+
else
|
422
|
+
event["type"] || "logs"
|
423
|
+
end
|
424
|
+
|
425
|
+
params = {
|
426
|
+
:_id => @document_id ? event.sprintf(@document_id) : nil,
|
427
|
+
:_index => event.sprintf(@index),
|
428
|
+
:_type => type,
|
429
|
+
:_routing => @routing ? event.sprintf(@routing) : nil
|
430
|
+
}
|
431
|
+
|
432
|
+
params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @action == 'update' && @upsert != ""
|
433
|
+
|
434
|
+
buffer_receive([event.sprintf(@action), params, event])
|
435
|
+
end # def receive
|
436
|
+
|
437
|
+
public
|
438
|
+
# The submit method can be called from both the
|
439
|
+
# Stud::Buffer flush thread and from our own retry thread.
|
440
|
+
def submit(actions)
|
441
|
+
es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash] }
|
442
|
+
@submit_mutex.lock
|
443
|
+
begin
|
444
|
+
bulk_response = client.bulk(es_actions)
|
445
|
+
ensure
|
446
|
+
@submit_mutex.unlock
|
447
|
+
end
|
448
|
+
if bulk_response["errors"]
|
449
|
+
actions_with_responses = actions.zip(bulk_response['statuses'])
|
450
|
+
actions_to_retry = []
|
451
|
+
actions_with_responses.each do |action, resp_code|
|
452
|
+
if RETRYABLE_CODES.include?(resp_code)
|
453
|
+
@logger.warn "retrying failed action with response code: #{resp_code}"
|
454
|
+
actions_to_retry << action
|
455
|
+
elsif not SUCCESS_CODES.include?(resp_code)
|
456
|
+
@logger.warn "failed action with response of #{resp_code}, dropping action: #{action}"
|
457
|
+
end
|
458
|
+
end
|
459
|
+
retry_push(actions_to_retry) unless actions_to_retry.empty?
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
# When there are exceptions raised upon submission, we raise an exception so that
|
464
|
+
# Stud::Buffer will retry to flush
|
465
|
+
public
|
466
|
+
def flush(actions, teardown = false)
|
467
|
+
begin
|
468
|
+
submit(actions)
|
469
|
+
rescue => e
|
470
|
+
@logger.error "Got error to send bulk of actions: #{e.message}"
|
471
|
+
raise e
|
472
|
+
end
|
473
|
+
end # def flush
|
474
|
+
|
475
|
+
public
|
476
|
+
def teardown
|
477
|
+
if @cacert # remove temporary jks store created from the cacert
|
478
|
+
File.delete(@truststore)
|
479
|
+
end
|
480
|
+
|
481
|
+
@retry_teardown_requested.make_true
|
482
|
+
# First, make sure retry_timer_thread is stopped
|
483
|
+
# to ensure we do not signal a retry based on
|
484
|
+
# the retry interval.
|
485
|
+
Thread.kill(@retry_timer_thread)
|
486
|
+
@retry_timer_thread.join
|
487
|
+
# Signal flushing in the case that #retry_flush is in
|
488
|
+
# the process of waiting for a signal.
|
489
|
+
@retry_flush_mutex.synchronize { @retry_queue_needs_flushing.signal }
|
490
|
+
# Now, #retry_flush is ensured to not be in a state of
|
491
|
+
# waiting and can be safely joined into the main thread
|
492
|
+
# for further final execution of an in-process remaining call.
|
493
|
+
@retry_thread.join
|
494
|
+
|
495
|
+
# execute any final actions along with a proceeding retry for any
|
496
|
+
# final actions that did not succeed.
|
497
|
+
buffer_flush(:final => true)
|
498
|
+
retry_flush
|
499
|
+
end
|
500
|
+
|
501
|
+
private
|
502
|
+
# in charge of submitting any actions in @retry_queue that need to be
|
503
|
+
# retried
|
504
|
+
#
|
505
|
+
# This method is not called concurrently. It is only called by @retry_thread
|
506
|
+
# and once that thread is ended during the teardown process, a final call
|
507
|
+
# to this method is done upon teardown in the main thread.
|
508
|
+
def retry_flush()
|
509
|
+
unless @retry_queue.empty?
|
510
|
+
buffer = @retry_queue.size.times.map do
|
511
|
+
next_action, next_doc, next_event = @retry_queue.pop
|
512
|
+
next_event['@metadata']['retry_count'] += 1
|
513
|
+
|
514
|
+
if next_event['@metadata']['retry_count'] > @max_retries
|
515
|
+
@logger.error "too many attempts at sending event. dropping: #{next_event}"
|
516
|
+
nil
|
517
|
+
else
|
518
|
+
[next_action, next_doc, next_event]
|
519
|
+
end
|
520
|
+
end.compact
|
521
|
+
|
522
|
+
submit(buffer) unless buffer.empty?
|
523
|
+
end
|
524
|
+
|
525
|
+
@retry_flush_mutex.synchronize {
|
526
|
+
@retry_queue_not_full.signal if @retry_queue.size < @retry_max_items
|
527
|
+
}
|
528
|
+
end
|
529
|
+
|
530
|
+
private
|
531
|
+
def retry_push(actions)
|
532
|
+
Array(actions).each{|action| @retry_queue << action}
|
533
|
+
@retry_flush_mutex.synchronize {
|
534
|
+
@retry_queue_needs_flushing.signal if @retry_queue.size >= @retry_max_items
|
535
|
+
}
|
536
|
+
end
|
537
|
+
|
538
|
+
@@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-elasticsearch_java-/ }
|
539
|
+
|
540
|
+
@@plugins.each do |plugin|
|
541
|
+
name = plugin.name.split('-')[-1]
|
542
|
+
require "logstash/outputs/elasticsearch_java/#{name}"
|
543
|
+
end
|
544
|
+
|
545
|
+
end # class LogStash::Outputs::ElasticSearchJava
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-output-elasticsearch_java'
|
3
|
+
s.version = '1.0.0.beta1'
|
4
|
+
s.licenses = ['apache-2.0']
|
5
|
+
s.summary = "Logstash Output to Elasticsearch using Java node/transport client"
|
6
|
+
s.description = "Output events to elasticsearch using the java client"
|
7
|
+
s.authors = ["Elastic"]
|
8
|
+
s.email = 'info@elastic.co'
|
9
|
+
s.homepage = "http://logstash.net/"
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = `git ls-files`.split($\)
|
14
|
+
|
15
|
+
# Tests
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
|
18
|
+
# Special flag to let us know this is actually a logstash plugin
|
19
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
|
20
|
+
|
21
|
+
# Gem dependencies
|
22
|
+
s.add_runtime_dependency 'concurrent-ruby'
|
23
|
+
s.add_runtime_dependency 'elasticsearch', ['>= 1.0.10', '~> 1.0']
|
24
|
+
s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
|
25
|
+
s.add_runtime_dependency 'cabin', ['~> 0.6']
|
26
|
+
s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0'
|
27
|
+
|
28
|
+
s.add_development_dependency 'ftw', '~> 0.0.42'
|
29
|
+
s.add_development_dependency 'logstash-input-generator'
|
30
|
+
s.add_development_dependency 'logstash-devutils'
|
31
|
+
s.add_development_dependency 'longshoreman'
|
32
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
2
|
+
require "ftw"
|
3
|
+
require "logstash/plugin"
|
4
|
+
require "logstash/json"
|
5
|
+
require "stud/try"
|
6
|
+
require "longshoreman"
|
7
|
+
require "logstash/outputs/elasticsearch_java"
|
8
|
+
require "logstash/outputs/elasticsearch_java/protocol"
|
9
|
+
|
10
|
+
CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
|
11
|
+
CONTAINER_IMAGE = "elasticsearch"
|
12
|
+
CONTAINER_TAG = "1.6"
|
13
|
+
|
14
|
+
DOCKER_INTEGRATION = ENV["DOCKER_INTEGRATION"]
|
15
|
+
|
16
|
+
module ESHelper
|
17
|
+
def get_local_host
|
18
|
+
"127.0.0.1"
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_host
|
22
|
+
DOCKER_INTEGRATION ? Longshoreman.new.get_host_ip : "127.0.0.1"
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_port(protocol)
|
26
|
+
unless DOCKER_INTEGRATION
|
27
|
+
return protocol.to_sym == :http ? 9200 : 9300
|
28
|
+
end
|
29
|
+
|
30
|
+
container = Longshoreman::Container.new
|
31
|
+
container.get(CONTAINER_NAME)
|
32
|
+
container.rport(9300)
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_client
|
36
|
+
Elasticsearch::Client.new(:hosts => "#{get_host}:#{get_port('http')}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
RSpec.configure do |config|
|
42
|
+
config.include ESHelper
|
43
|
+
|
44
|
+
|
45
|
+
if DOCKER_INTEGRATION
|
46
|
+
# this :all hook gets run before every describe block that is tagged with :integration => true.
|
47
|
+
config.before(:all, :integration => true) do
|
48
|
+
|
49
|
+
|
50
|
+
# check if container exists already before creating new one.
|
51
|
+
begin
|
52
|
+
ls = Longshoreman::new
|
53
|
+
ls.container.get(CONTAINER_NAME)
|
54
|
+
rescue Docker::Error::NotFoundError
|
55
|
+
Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME)
|
56
|
+
# TODO(talevy): verify ES is running instead of static timeout
|
57
|
+
sleep 10
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# we want to do a final cleanup after all :integration runs,
|
62
|
+
# but we don't want to clean up before the last block.
|
63
|
+
# This is a final blind check to see if the ES docker container is running and
|
64
|
+
# needs to be cleaned up. If no container can be found and/or docker is not
|
65
|
+
# running on the system, we do nothing.
|
66
|
+
config.after(:suite) do
|
67
|
+
# only cleanup docker container if system has docker and the container is running
|
68
|
+
begin
|
69
|
+
ls = Longshoreman::new
|
70
|
+
ls.container.get(CONTAINER_NAME)
|
71
|
+
ls.cleanup
|
72
|
+
rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
|
73
|
+
# do nothing
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
config.after(:each) do
|
79
|
+
LogStash::Outputs::ElasticSearchJavaPlugins::Protocols::NodeClient.clear_client()
|
80
|
+
end
|
81
|
+
end
|