logstash-output-elasticsearch 0.1.6 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +117 -0
  3. data/CONTRIBUTORS +32 -0
  4. data/Gemfile +4 -4
  5. data/LICENSE +1 -1
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +110 -0
  8. data/lib/logstash/outputs/elasticsearch.rb +97 -425
  9. data/lib/logstash/outputs/elasticsearch/buffer.rb +124 -0
  10. data/lib/logstash/outputs/elasticsearch/common.rb +205 -0
  11. data/lib/logstash/outputs/elasticsearch/common_configs.rb +164 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +36 -24
  13. data/lib/logstash/outputs/elasticsearch/http_client.rb +236 -0
  14. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +106 -0
  15. data/lib/logstash/outputs/elasticsearch/template_manager.rb +35 -0
  16. data/logstash-output-elasticsearch.gemspec +17 -15
  17. data/spec/es_spec_helper.rb +77 -0
  18. data/spec/fixtures/scripts/scripted_update.groovy +2 -0
  19. data/spec/fixtures/scripts/scripted_update_nested.groovy +2 -0
  20. data/spec/fixtures/scripts/scripted_upsert.groovy +2 -0
  21. data/spec/integration/outputs/create_spec.rb +55 -0
  22. data/spec/integration/outputs/index_spec.rb +68 -0
  23. data/spec/integration/outputs/parent_spec.rb +73 -0
  24. data/spec/integration/outputs/pipeline_spec.rb +75 -0
  25. data/spec/integration/outputs/retry_spec.rb +163 -0
  26. data/spec/integration/outputs/routing_spec.rb +65 -0
  27. data/spec/integration/outputs/secure_spec.rb +108 -0
  28. data/spec/integration/outputs/templates_spec.rb +90 -0
  29. data/spec/integration/outputs/update_spec.rb +188 -0
  30. data/spec/unit/buffer_spec.rb +118 -0
  31. data/spec/unit/http_client_builder_spec.rb +27 -0
  32. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +133 -0
  33. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +58 -0
  34. data/spec/unit/outputs/elasticsearch_spec.rb +227 -0
  35. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +55 -0
  36. metadata +137 -51
  37. data/.gitignore +0 -4
  38. data/Rakefile +0 -6
  39. data/lib/logstash/outputs/elasticsearch/protocol.rb +0 -253
  40. data/rakelib/publish.rake +0 -9
  41. data/rakelib/vendor.rake +0 -169
  42. data/spec/outputs/elasticsearch.rb +0 -518
@@ -0,0 +1,124 @@
1
+ require 'concurrent'
2
+ java_import java.util.concurrent.locks.ReentrantLock
3
+
4
+ module LogStash; module Outputs; class ElasticSearch
5
+ class Buffer
6
+ def initialize(logger, max_size, flush_interval, &block)
7
+ @logger = logger
8
+ # You need to aquire this for anything modifying state generally
9
+ @operations_mutex = Mutex.new
10
+ @operations_lock = java.util.concurrent.locks.ReentrantLock.new
11
+
12
+ @stopping = Concurrent::AtomicBoolean.new(false)
13
+ @max_size = max_size
14
+ @submit_proc = block
15
+
16
+ @buffer = []
17
+
18
+ @last_flush = Time.now
19
+ @flush_interval = flush_interval
20
+ @flush_thread = spawn_interval_flusher
21
+ end
22
+
23
+ def push(item)
24
+ synchronize do |buffer|
25
+ push_unsafe(item)
26
+ end
27
+ end
28
+ alias_method :<<, :push
29
+
30
+ # Push multiple items onto the buffer in a single operation
31
+ def push_multi(items)
32
+ raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
33
+ synchronize do |buffer|
34
+ items.each {|item| push_unsafe(item) }
35
+ end
36
+ end
37
+
38
+ def flush
39
+ synchronize { flush_unsafe }
40
+ end
41
+
42
+ def stop(do_flush=true,wait_complete=true)
43
+ return if stopping?
44
+ @stopping.make_true
45
+
46
+ # No need to acquire a lock in this case
47
+ return if !do_flush && !wait_complete
48
+
49
+ synchronize do
50
+ flush_unsafe if do_flush
51
+ @flush_thread.join if wait_complete
52
+ end
53
+ end
54
+
55
+ def contents
56
+ synchronize {|buffer| buffer}
57
+ end
58
+
59
+ # For externally operating on the buffer contents
60
+ # this takes a block and will yield the internal buffer and executes
61
+ # the block in a synchronized block from the internal mutex
62
+ def synchronize
63
+ @operations_mutex.synchronize { yield(@buffer) }
64
+ end
65
+
66
+ # These methods are private for various reasons, chief among them threadsafety!
67
+ # Many require the @operations_mutex to be locked to be safe
68
+ private
69
+
70
+ def push_unsafe(item)
71
+ @buffer << item
72
+ if @buffer.size >= @max_size
73
+ flush_unsafe
74
+ end
75
+ end
76
+
77
+ def spawn_interval_flusher
78
+ Thread.new do
79
+ loop do
80
+ sleep 0.2
81
+ break if stopping?
82
+ synchronize { interval_flush }
83
+ end
84
+ end
85
+ end
86
+
87
+ def interval_flush
88
+ if last_flush_seconds_ago >= @flush_interval
89
+ begin
90
+ @logger.debug? && @logger.debug("Flushing buffer at interval",
91
+ :instance => self.inspect,
92
+ :interval => @flush_interval)
93
+ flush_unsafe
94
+ rescue StandardError => e
95
+ @logger.warn("Error flushing buffer at interval!",
96
+ :instance => self.inspect,
97
+ :message => e.message,
98
+ :class => e.class.name,
99
+ :backtrace => e.backtrace
100
+ )
101
+ rescue Exception => e
102
+ @logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
103
+ end
104
+ end
105
+ end
106
+
107
+ def flush_unsafe
108
+ if @buffer.size > 0
109
+ @submit_proc.call(@buffer)
110
+ @buffer.clear
111
+ end
112
+
113
+ @last_flush = Time.now # This must always be set to ensure correct timer behavior
114
+ end
115
+
116
+ def last_flush_seconds_ago
117
+ Time.now - @last_flush
118
+ end
119
+
120
+ def stopping?
121
+ @stopping.true?
122
+ end
123
+ end
124
+ end end end
@@ -0,0 +1,205 @@
1
+ require "logstash/outputs/elasticsearch/template_manager"
2
+ require "logstash/outputs/elasticsearch/buffer"
3
+
4
+ module LogStash; module Outputs; class ElasticSearch;
5
+ module Common
6
+ attr_reader :client, :hosts
7
+
8
+ RETRYABLE_CODES = [429, 503]
9
+ SUCCESS_CODES = [200, 201]
10
+
11
+ def register
12
+ @stopping = Concurrent::AtomicBoolean.new(false)
13
+ setup_hosts # properly sets @hosts
14
+ build_client
15
+ install_template
16
+ setup_buffer_and_handler
17
+ check_action_validity
18
+
19
+ @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts)
20
+ end
21
+
22
+ def receive(event)
23
+ @buffer << event_action_tuple(event)
24
+ end
25
+
26
+ # Receive an array of events and immediately attempt to index them (no buffering)
27
+ def multi_receive(events)
28
+ events.each_slice(@flush_size) do |slice|
29
+ retrying_submit(slice.map {|e| event_action_tuple(e) })
30
+ end
31
+ end
32
+
33
+ # Convert the event into a 3-tuple of action, params, and event
34
+ def event_action_tuple(event)
35
+ params = event_action_params(event)
36
+ action = event.sprintf(@action)
37
+ [action, params, event]
38
+ end
39
+
40
+ def flush
41
+ @buffer.flush
42
+ end
43
+
44
+ def setup_hosts
45
+ @hosts = Array(@hosts)
46
+ if @hosts.empty?
47
+ @logger.info("No 'host' set in elasticsearch output. Defaulting to localhost")
48
+ @hosts.replace(["localhost"])
49
+ end
50
+ end
51
+
52
+ def install_template
53
+ TemplateManager.install_template(self)
54
+ end
55
+
56
+ def setup_buffer_and_handler
57
+ @buffer = ::LogStash::Outputs::ElasticSearch::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions|
58
+ retrying_submit(actions)
59
+ end
60
+ end
61
+
62
+ def check_action_validity
63
+ raise LogStash::ConfigurationError, "No action specified!" unless @action
64
+
65
+ # If we're using string interpolation, we're good!
66
+ return if @action =~ /%{.+}/
67
+ return if valid_actions.include?(@action)
68
+
69
+ raise LogStash::ConfigurationError, "Action '#{@action}' is invalid! Pick one of #{valid_actions} or use a sprintf style statement"
70
+ end
71
+
72
+ # To be overidden by the -java version
73
+ VALID_HTTP_ACTIONS=["index", "delete", "create", "update"]
74
+ def valid_actions
75
+ VALID_HTTP_ACTIONS
76
+ end
77
+
78
+ def retrying_submit(actions)
79
+ # Initially we submit the full list of actions
80
+ submit_actions = actions
81
+
82
+ while submit_actions && submit_actions.length > 0
83
+ return if !submit_actions || submit_actions.empty? # If everything's a success we move along
84
+ # We retry with whatever is didn't succeed
85
+ begin
86
+ submit_actions = submit(submit_actions)
87
+ rescue => e
88
+ @logger.warn("Encountered an unexpected error submitting a bulk request! Will retry.",
89
+ :message => e.message,
90
+ :class => e.class.name,
91
+ :backtrace => e.backtrace)
92
+ end
93
+
94
+ sleep @retry_max_interval if submit_actions && submit_actions.length > 0
95
+ end
96
+ end
97
+
98
+ def submit(actions)
99
+ es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash]}
100
+
101
+ bulk_response = safe_bulk(es_actions,actions)
102
+
103
+ # If there are no errors, we're done here!
104
+ return unless bulk_response["errors"]
105
+
106
+ actions_to_retry = []
107
+ bulk_response["items"].each_with_index do |response,idx|
108
+ action_type, action_props = response.first
109
+ status = action_props["status"]
110
+ error = action_props["error"]
111
+ action = actions[idx]
112
+
113
+ if SUCCESS_CODES.include?(status)
114
+ next
115
+ elsif RETRYABLE_CODES.include?(status)
116
+ @logger.info "retrying failed action with response code: #{status} (#{error})"
117
+ actions_to_retry << action
118
+ else
119
+ @logger.warn "Failed action. ", status: status, action: action, response: response
120
+ end
121
+ end
122
+
123
+ actions_to_retry
124
+ end
125
+
126
+ # get the action parameters for the given event
127
+ def event_action_params(event)
128
+ type = get_event_type(event)
129
+
130
+ params = {
131
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
132
+ :_index => event.sprintf(@index),
133
+ :_type => type,
134
+ :_routing => @routing ? event.sprintf(@routing) : nil
135
+ }
136
+
137
+ if @pipeline
138
+ params[:pipeline] = @pipeline
139
+ end
140
+
141
+ if @parent
142
+ params[:parent] = event.sprintf(@parent)
143
+ end
144
+
145
+ if @action == 'update'
146
+ params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @upsert != ""
147
+ params[:_script] = event.sprintf(@script) if @script != ""
148
+ params[:_retry_on_conflict] = @retry_on_conflict
149
+ end
150
+
151
+ params
152
+ end
153
+
154
+ # Determine the correct value for the 'type' field for the given event
155
+ def get_event_type(event)
156
+ # Set the 'type' value for the index.
157
+ type = if @document_type
158
+ event.sprintf(@document_type)
159
+ else
160
+ event.get("type") || "logs"
161
+ end
162
+
163
+ if !(type.is_a?(String) || type.is_a?(Numeric))
164
+ @logger.warn("Bad event type! Non-string/integer type value set!", :type_class => type.class, :type_value => type.to_s, :event => event)
165
+ end
166
+
167
+ type.to_s
168
+ end
169
+
170
+ # Rescue retryable errors during bulk submission
171
+ def safe_bulk(es_actions,actions)
172
+ @client.bulk(es_actions)
173
+ rescue Manticore::SocketException, Manticore::SocketTimeout => e
174
+ # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
175
+ # and let the user sort it out from there
176
+ @logger.error(
177
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}',"+
178
+ " but Elasticsearch appears to be unreachable or down!",
179
+ :error_message => e.message,
180
+ :class => e.class.name,
181
+ :client_config => @client.client_options,
182
+ )
183
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
184
+
185
+ # We retry until there are no errors! Errors should all go to the retry queue
186
+ sleep @retry_max_interval
187
+ retry unless @stopping.true?
188
+ rescue => e
189
+ # For all other errors print out full connection issues
190
+ @logger.error(
191
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}'," +
192
+ " but an error occurred and it failed! Are you sure you can reach elasticsearch from this machine using " +
193
+ "the configuration provided?",
194
+ :error_message => e.message,
195
+ :error_class => e.class.name,
196
+ :backtrace => e.backtrace,
197
+ :client_config => @client.client_options,
198
+ )
199
+
200
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
201
+
202
+ raise e
203
+ end
204
+ end
205
+ end; end; end
@@ -0,0 +1,164 @@
1
+ module LogStash; module Outputs; class ElasticSearch
2
+ module CommonConfigs
3
+ def self.included(mod)
4
+ # The index to write events to. This can be dynamic using the `%{foo}` syntax.
5
+ # The default value will partition your indices by day so you can more easily
6
+ # delete old data or only search specific date ranges.
7
+ # Indexes may not contain uppercase characters.
8
+ # For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}.
9
+ # LS uses Joda to format the index pattern from event timestamp.
10
+ # Joda formats are defined http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].
11
+ mod.config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
12
+
13
+ # The index type to write events to. Generally you should try to write only
14
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
15
+ #
16
+ # Deprecated in favor of `docoument_type` field.
17
+ mod.config :index_type, :validate => :string, :obsolete => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
18
+
19
+ # The document type to write events to. Generally you should try to write only
20
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
21
+ # Unless you set 'document_type', the event 'type' will be used if it exists
22
+ # otherwise the document type will be assigned the value of 'logs'
23
+ mod.config :document_type, :validate => :string
24
+
25
+ # Starting in Logstash 1.3 (unless you set option `manage_template` to false)
26
+ # a default mapping template for Elasticsearch will be applied, if you do not
27
+ # already have one set to match the index pattern defined (default of
28
+ # `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
29
+ # the template will be applied to all indices starting with `logstash-*`
30
+ #
31
+ # If you have dynamic templating (e.g. creating indices based on field names)
32
+ # then you should set `manage_template` to false and use the REST API to upload
33
+ # your templates manually.
34
+ mod.config :manage_template, :validate => :boolean, :default => true
35
+
36
+ # This configuration option defines how the template is named inside Elasticsearch.
37
+ # Note that if you have used the template management features and subsequently
38
+ # change this, you will need to prune the old template manually, e.g.
39
+ #
40
+ # `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
41
+ #
42
+ # where `OldTemplateName` is whatever the former setting was.
43
+ mod.config :template_name, :validate => :string, :default => "logstash"
44
+
45
+ # You can set the path to your own template here, if you so desire.
46
+ # If not set, the included template will be used.
47
+ mod.config :template, :validate => :path
48
+
49
+ # The template_overwrite option will always overwrite the indicated template
50
+ # in Elasticsearch with either the one indicated by template or the included one.
51
+ # This option is set to false by default. If you always want to stay up to date
52
+ # with the template provided by Logstash, this option could be very useful to you.
53
+ # Likewise, if you have your own template file managed by puppet, for example, and
54
+ # you wanted to be able to update it regularly, this option could help there as well.
55
+ #
56
+ # Please note that if you are using your own customized version of the Logstash
57
+ # template (logstash), setting this to true will make Logstash to overwrite
58
+ # the "logstash" template (i.e. removing all customized settings)
59
+ mod.config :template_overwrite, :validate => :boolean, :default => false
60
+
61
+ # The document ID for the index. Useful for overwriting existing entries in
62
+ # Elasticsearch with the same ID.
63
+ mod.config :document_id, :validate => :string
64
+
65
+ # A routing override to be applied to all processed events.
66
+ # This can be dynamic using the `%{foo}` syntax.
67
+ mod.config :routing, :validate => :string
68
+
69
+ # For child documents, ID of the associated parent.
70
+ # This can be dynamic using the `%{foo}` syntax.
71
+ mod.config :parent, :validate => :string, :default => nil
72
+
73
+ # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
74
+ # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
75
+ # `"127.0.0.1"`
76
+ # `["127.0.0.1:9200","127.0.0.2:9200"]`
77
+ # `["http://127.0.0.1"]`
78
+ # `["https://127.0.0.1:9200"]`
79
+ # `["https://127.0.0.1:9200/mypath"]` (If using a proxy on a subpath)
80
+ # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `hosts` list
81
+ # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes in Elasticsearch.
82
+ mod.config :hosts, :validate => :array, :default => ["127.0.0.1"]
83
+
84
+ mod.config :host, :obsolete => "Please use the 'hosts' setting instead. You can specify multiple entries separated by comma in 'host:port' format."
85
+
86
+ # The port setting is obsolete. Please use the 'hosts' setting instead.
87
+ # Hosts entries can be in "host:port" format.
88
+ mod.config :port, :obsolete => "Please use the 'hosts' setting instead. Hosts entries can be in 'host:port' format."
89
+
90
+ # This plugin uses the bulk index API for improved indexing performance.
91
+ # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make
92
+ # You you may want to increase this to be in line with your pipeline's batch size.
93
+ # If you specify a number larger than the batch size of your pipeline it will have no effect,
94
+ # save for the case where a filter increases the size of an inflight batch by outputting
95
+ # events.
96
+ #
97
+ # In Logstashes <= 2.1 this plugin uses its own internal buffer of events.
98
+ # This config option sets that size. In these older logstashes this size may
99
+ # have a significant impact on heap usage, whereas in 2.2+ it will never increase it.
100
+ # To make efficient bulk API calls, we will buffer a certain number of
101
+ # events before flushing that out to Elasticsearch. This setting
102
+ # controls how many events will be buffered before sending a batch
103
+ # of events. Increasing the `flush_size` has an effect on Logstash's heap size.
104
+ # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big documents
105
+ # or have increased the `flush_size` to a higher value.
106
+ mod.config :flush_size, :validate => :number, :default => 500
107
+
108
+ # The amount of time since last flush before a flush is forced.
109
+ #
110
+ # This setting helps ensure slow event rates don't get stuck in Logstash.
111
+ # For example, if your `flush_size` is 100, and you have received 10 events,
112
+ # and it has been more than `idle_flush_time` seconds since the last flush,
113
+ # Logstash will flush those 10 events automatically.
114
+ #
115
+ # This helps keep both fast and slow log streams moving along in
116
+ # near-real-time.
117
+ mod.config :idle_flush_time, :validate => :number, :default => 1
118
+
119
+ # Set upsert content for update mode.s
120
+ # Create a new document with this parameter as json string if `document_id` doesn't exists
121
+ mod.config :upsert, :validate => :string, :default => ""
122
+
123
+ # Enable `doc_as_upsert` for update mode.
124
+ # Create a new document with source if `document_id` doesn't exist in Elasticsearch
125
+ mod.config :doc_as_upsert, :validate => :boolean, :default => false
126
+
127
+ # DEPRECATED This setting no longer does anything. It will be marked obsolete in a future version.
128
+ mod.config :max_retries, :validate => :number, :default => 3
129
+
130
+ # Set script name for scripted update mode
131
+ mod.config :script, :validate => :string, :default => ""
132
+
133
+ # Define the type of script referenced by "script" variable
134
+ # inline : "script" contains inline script
135
+ # indexed : "script" contains the name of script directly indexed in elasticsearch
136
+ # file : "script" contains the name of script stored in elasticseach's config directory
137
+ mod.config :script_type, :validate => ["inline", 'indexed', "file"], :default => ["inline"]
138
+
139
+ # Set the language of the used script
140
+ mod.config :script_lang, :validate => :string, :default => ""
141
+
142
+ # Set variable name passed to script (scripted update)
143
+ mod.config :script_var_name, :validate => :string, :default => "event"
144
+
145
+ # if enabled, script is in charge of creating non-existent document (scripted update)
146
+ mod.config :scripted_upsert, :validate => :boolean, :default => false
147
+
148
+ # Set max interval between bulk retries.
149
+ mod.config :retry_max_interval, :validate => :number, :default => 2
150
+
151
+ # DEPRECATED This setting no longer does anything. If you need to change the number of retries in flight
152
+ # try increasing the total number of workers to better handle this.
153
+ mod.config :retry_max_items, :validate => :number, :default => 500, :deprecated => true
154
+
155
+ # The number of times Elasticsearch should internally retry an update/upserted document
156
+ # See the https://www.elastic.co/guide/en/elasticsearch/guide/current/partial-updates.html[partial updates]
157
+ # for more info
158
+ mod.config :retry_on_conflict, :validate => :number, :default => 1
159
+
160
+ # Set which ingest pipeline you wish to execute for an event
161
+ mod.config :pipeline, :validate => :string, :default => nil
162
+ end
163
+ end
164
+ end end end