logstash-output-elasticsearch 0.1.6 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +117 -0
  3. data/CONTRIBUTORS +32 -0
  4. data/Gemfile +4 -4
  5. data/LICENSE +1 -1
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +110 -0
  8. data/lib/logstash/outputs/elasticsearch.rb +97 -425
  9. data/lib/logstash/outputs/elasticsearch/buffer.rb +124 -0
  10. data/lib/logstash/outputs/elasticsearch/common.rb +205 -0
  11. data/lib/logstash/outputs/elasticsearch/common_configs.rb +164 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +36 -24
  13. data/lib/logstash/outputs/elasticsearch/http_client.rb +236 -0
  14. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +106 -0
  15. data/lib/logstash/outputs/elasticsearch/template_manager.rb +35 -0
  16. data/logstash-output-elasticsearch.gemspec +17 -15
  17. data/spec/es_spec_helper.rb +77 -0
  18. data/spec/fixtures/scripts/scripted_update.groovy +2 -0
  19. data/spec/fixtures/scripts/scripted_update_nested.groovy +2 -0
  20. data/spec/fixtures/scripts/scripted_upsert.groovy +2 -0
  21. data/spec/integration/outputs/create_spec.rb +55 -0
  22. data/spec/integration/outputs/index_spec.rb +68 -0
  23. data/spec/integration/outputs/parent_spec.rb +73 -0
  24. data/spec/integration/outputs/pipeline_spec.rb +75 -0
  25. data/spec/integration/outputs/retry_spec.rb +163 -0
  26. data/spec/integration/outputs/routing_spec.rb +65 -0
  27. data/spec/integration/outputs/secure_spec.rb +108 -0
  28. data/spec/integration/outputs/templates_spec.rb +90 -0
  29. data/spec/integration/outputs/update_spec.rb +188 -0
  30. data/spec/unit/buffer_spec.rb +118 -0
  31. data/spec/unit/http_client_builder_spec.rb +27 -0
  32. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +133 -0
  33. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +58 -0
  34. data/spec/unit/outputs/elasticsearch_spec.rb +227 -0
  35. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +55 -0
  36. metadata +137 -51
  37. data/.gitignore +0 -4
  38. data/Rakefile +0 -6
  39. data/lib/logstash/outputs/elasticsearch/protocol.rb +0 -253
  40. data/rakelib/publish.rake +0 -9
  41. data/rakelib/vendor.rake +0 -169
  42. data/spec/outputs/elasticsearch.rb +0 -518
@@ -0,0 +1,124 @@
1
+ require 'concurrent'
2
+ java_import java.util.concurrent.locks.ReentrantLock
3
+
4
+ module LogStash; module Outputs; class ElasticSearch
5
+ class Buffer
6
+ def initialize(logger, max_size, flush_interval, &block)
7
+ @logger = logger
8
+ # You need to aquire this for anything modifying state generally
9
+ @operations_mutex = Mutex.new
10
+ @operations_lock = java.util.concurrent.locks.ReentrantLock.new
11
+
12
+ @stopping = Concurrent::AtomicBoolean.new(false)
13
+ @max_size = max_size
14
+ @submit_proc = block
15
+
16
+ @buffer = []
17
+
18
+ @last_flush = Time.now
19
+ @flush_interval = flush_interval
20
+ @flush_thread = spawn_interval_flusher
21
+ end
22
+
23
+ def push(item)
24
+ synchronize do |buffer|
25
+ push_unsafe(item)
26
+ end
27
+ end
28
+ alias_method :<<, :push
29
+
30
+ # Push multiple items onto the buffer in a single operation
31
+ def push_multi(items)
32
+ raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
33
+ synchronize do |buffer|
34
+ items.each {|item| push_unsafe(item) }
35
+ end
36
+ end
37
+
38
+ def flush
39
+ synchronize { flush_unsafe }
40
+ end
41
+
42
+ def stop(do_flush=true,wait_complete=true)
43
+ return if stopping?
44
+ @stopping.make_true
45
+
46
+ # No need to acquire a lock in this case
47
+ return if !do_flush && !wait_complete
48
+
49
+ synchronize do
50
+ flush_unsafe if do_flush
51
+ @flush_thread.join if wait_complete
52
+ end
53
+ end
54
+
55
+ def contents
56
+ synchronize {|buffer| buffer}
57
+ end
58
+
59
+ # For externally operating on the buffer contents
60
+ # this takes a block and will yield the internal buffer and executes
61
+ # the block in a synchronized block from the internal mutex
62
+ def synchronize
63
+ @operations_mutex.synchronize { yield(@buffer) }
64
+ end
65
+
66
+ # These methods are private for various reasons, chief among them threadsafety!
67
+ # Many require the @operations_mutex to be locked to be safe
68
+ private
69
+
70
+ def push_unsafe(item)
71
+ @buffer << item
72
+ if @buffer.size >= @max_size
73
+ flush_unsafe
74
+ end
75
+ end
76
+
77
+ def spawn_interval_flusher
78
+ Thread.new do
79
+ loop do
80
+ sleep 0.2
81
+ break if stopping?
82
+ synchronize { interval_flush }
83
+ end
84
+ end
85
+ end
86
+
87
+ def interval_flush
88
+ if last_flush_seconds_ago >= @flush_interval
89
+ begin
90
+ @logger.debug? && @logger.debug("Flushing buffer at interval",
91
+ :instance => self.inspect,
92
+ :interval => @flush_interval)
93
+ flush_unsafe
94
+ rescue StandardError => e
95
+ @logger.warn("Error flushing buffer at interval!",
96
+ :instance => self.inspect,
97
+ :message => e.message,
98
+ :class => e.class.name,
99
+ :backtrace => e.backtrace
100
+ )
101
+ rescue Exception => e
102
+ @logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
103
+ end
104
+ end
105
+ end
106
+
107
+ def flush_unsafe
108
+ if @buffer.size > 0
109
+ @submit_proc.call(@buffer)
110
+ @buffer.clear
111
+ end
112
+
113
+ @last_flush = Time.now # This must always be set to ensure correct timer behavior
114
+ end
115
+
116
+ def last_flush_seconds_ago
117
+ Time.now - @last_flush
118
+ end
119
+
120
+ def stopping?
121
+ @stopping.true?
122
+ end
123
+ end
124
+ end end end
@@ -0,0 +1,205 @@
1
+ require "logstash/outputs/elasticsearch/template_manager"
2
+ require "logstash/outputs/elasticsearch/buffer"
3
+
4
+ module LogStash; module Outputs; class ElasticSearch;
5
+ module Common
6
+ attr_reader :client, :hosts
7
+
8
+ RETRYABLE_CODES = [429, 503]
9
+ SUCCESS_CODES = [200, 201]
10
+
11
+ def register
12
+ @stopping = Concurrent::AtomicBoolean.new(false)
13
+ setup_hosts # properly sets @hosts
14
+ build_client
15
+ install_template
16
+ setup_buffer_and_handler
17
+ check_action_validity
18
+
19
+ @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts)
20
+ end
21
+
22
+ def receive(event)
23
+ @buffer << event_action_tuple(event)
24
+ end
25
+
26
+ # Receive an array of events and immediately attempt to index them (no buffering)
27
+ def multi_receive(events)
28
+ events.each_slice(@flush_size) do |slice|
29
+ retrying_submit(slice.map {|e| event_action_tuple(e) })
30
+ end
31
+ end
32
+
33
+ # Convert the event into a 3-tuple of action, params, and event
34
+ def event_action_tuple(event)
35
+ params = event_action_params(event)
36
+ action = event.sprintf(@action)
37
+ [action, params, event]
38
+ end
39
+
40
+ def flush
41
+ @buffer.flush
42
+ end
43
+
44
+ def setup_hosts
45
+ @hosts = Array(@hosts)
46
+ if @hosts.empty?
47
+ @logger.info("No 'host' set in elasticsearch output. Defaulting to localhost")
48
+ @hosts.replace(["localhost"])
49
+ end
50
+ end
51
+
52
+ def install_template
53
+ TemplateManager.install_template(self)
54
+ end
55
+
56
+ def setup_buffer_and_handler
57
+ @buffer = ::LogStash::Outputs::ElasticSearch::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions|
58
+ retrying_submit(actions)
59
+ end
60
+ end
61
+
62
+ def check_action_validity
63
+ raise LogStash::ConfigurationError, "No action specified!" unless @action
64
+
65
+ # If we're using string interpolation, we're good!
66
+ return if @action =~ /%{.+}/
67
+ return if valid_actions.include?(@action)
68
+
69
+ raise LogStash::ConfigurationError, "Action '#{@action}' is invalid! Pick one of #{valid_actions} or use a sprintf style statement"
70
+ end
71
+
72
+ # To be overidden by the -java version
73
+ VALID_HTTP_ACTIONS=["index", "delete", "create", "update"]
74
+ def valid_actions
75
+ VALID_HTTP_ACTIONS
76
+ end
77
+
78
+ def retrying_submit(actions)
79
+ # Initially we submit the full list of actions
80
+ submit_actions = actions
81
+
82
+ while submit_actions && submit_actions.length > 0
83
+ return if !submit_actions || submit_actions.empty? # If everything's a success we move along
84
+ # We retry with whatever is didn't succeed
85
+ begin
86
+ submit_actions = submit(submit_actions)
87
+ rescue => e
88
+ @logger.warn("Encountered an unexpected error submitting a bulk request! Will retry.",
89
+ :message => e.message,
90
+ :class => e.class.name,
91
+ :backtrace => e.backtrace)
92
+ end
93
+
94
+ sleep @retry_max_interval if submit_actions && submit_actions.length > 0
95
+ end
96
+ end
97
+
98
+ def submit(actions)
99
+ es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash]}
100
+
101
+ bulk_response = safe_bulk(es_actions,actions)
102
+
103
+ # If there are no errors, we're done here!
104
+ return unless bulk_response["errors"]
105
+
106
+ actions_to_retry = []
107
+ bulk_response["items"].each_with_index do |response,idx|
108
+ action_type, action_props = response.first
109
+ status = action_props["status"]
110
+ error = action_props["error"]
111
+ action = actions[idx]
112
+
113
+ if SUCCESS_CODES.include?(status)
114
+ next
115
+ elsif RETRYABLE_CODES.include?(status)
116
+ @logger.info "retrying failed action with response code: #{status} (#{error})"
117
+ actions_to_retry << action
118
+ else
119
+ @logger.warn "Failed action. ", status: status, action: action, response: response
120
+ end
121
+ end
122
+
123
+ actions_to_retry
124
+ end
125
+
126
+ # get the action parameters for the given event
127
+ def event_action_params(event)
128
+ type = get_event_type(event)
129
+
130
+ params = {
131
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
132
+ :_index => event.sprintf(@index),
133
+ :_type => type,
134
+ :_routing => @routing ? event.sprintf(@routing) : nil
135
+ }
136
+
137
+ if @pipeline
138
+ params[:pipeline] = @pipeline
139
+ end
140
+
141
+ if @parent
142
+ params[:parent] = event.sprintf(@parent)
143
+ end
144
+
145
+ if @action == 'update'
146
+ params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @upsert != ""
147
+ params[:_script] = event.sprintf(@script) if @script != ""
148
+ params[:_retry_on_conflict] = @retry_on_conflict
149
+ end
150
+
151
+ params
152
+ end
153
+
154
+ # Determine the correct value for the 'type' field for the given event
155
+ def get_event_type(event)
156
+ # Set the 'type' value for the index.
157
+ type = if @document_type
158
+ event.sprintf(@document_type)
159
+ else
160
+ event.get("type") || "logs"
161
+ end
162
+
163
+ if !(type.is_a?(String) || type.is_a?(Numeric))
164
+ @logger.warn("Bad event type! Non-string/integer type value set!", :type_class => type.class, :type_value => type.to_s, :event => event)
165
+ end
166
+
167
+ type.to_s
168
+ end
169
+
170
+ # Rescue retryable errors during bulk submission
171
+ def safe_bulk(es_actions,actions)
172
+ @client.bulk(es_actions)
173
+ rescue Manticore::SocketException, Manticore::SocketTimeout => e
174
+ # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
175
+ # and let the user sort it out from there
176
+ @logger.error(
177
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}',"+
178
+ " but Elasticsearch appears to be unreachable or down!",
179
+ :error_message => e.message,
180
+ :class => e.class.name,
181
+ :client_config => @client.client_options,
182
+ )
183
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
184
+
185
+ # We retry until there are no errors! Errors should all go to the retry queue
186
+ sleep @retry_max_interval
187
+ retry unless @stopping.true?
188
+ rescue => e
189
+ # For all other errors print out full connection issues
190
+ @logger.error(
191
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}'," +
192
+ " but an error occurred and it failed! Are you sure you can reach elasticsearch from this machine using " +
193
+ "the configuration provided?",
194
+ :error_message => e.message,
195
+ :error_class => e.class.name,
196
+ :backtrace => e.backtrace,
197
+ :client_config => @client.client_options,
198
+ )
199
+
200
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
201
+
202
+ raise e
203
+ end
204
+ end
205
+ end; end; end
@@ -0,0 +1,164 @@
1
+ module LogStash; module Outputs; class ElasticSearch
2
+ module CommonConfigs
3
+ def self.included(mod)
4
+ # The index to write events to. This can be dynamic using the `%{foo}` syntax.
5
+ # The default value will partition your indices by day so you can more easily
6
+ # delete old data or only search specific date ranges.
7
+ # Indexes may not contain uppercase characters.
8
+ # For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}.
9
+ # LS uses Joda to format the index pattern from event timestamp.
10
+ # Joda formats are defined http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html[here].
11
+ mod.config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
12
+
13
+ # The index type to write events to. Generally you should try to write only
14
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
15
+ #
16
+ # Deprecated in favor of `docoument_type` field.
17
+ mod.config :index_type, :validate => :string, :obsolete => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
18
+
19
+ # The document type to write events to. Generally you should try to write only
20
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
21
+ # Unless you set 'document_type', the event 'type' will be used if it exists
22
+ # otherwise the document type will be assigned the value of 'logs'
23
+ mod.config :document_type, :validate => :string
24
+
25
+ # Starting in Logstash 1.3 (unless you set option `manage_template` to false)
26
+ # a default mapping template for Elasticsearch will be applied, if you do not
27
+ # already have one set to match the index pattern defined (default of
28
+ # `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
29
+ # the template will be applied to all indices starting with `logstash-*`
30
+ #
31
+ # If you have dynamic templating (e.g. creating indices based on field names)
32
+ # then you should set `manage_template` to false and use the REST API to upload
33
+ # your templates manually.
34
+ mod.config :manage_template, :validate => :boolean, :default => true
35
+
36
+ # This configuration option defines how the template is named inside Elasticsearch.
37
+ # Note that if you have used the template management features and subsequently
38
+ # change this, you will need to prune the old template manually, e.g.
39
+ #
40
+ # `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
41
+ #
42
+ # where `OldTemplateName` is whatever the former setting was.
43
+ mod.config :template_name, :validate => :string, :default => "logstash"
44
+
45
+ # You can set the path to your own template here, if you so desire.
46
+ # If not set, the included template will be used.
47
+ mod.config :template, :validate => :path
48
+
49
+ # The template_overwrite option will always overwrite the indicated template
50
+ # in Elasticsearch with either the one indicated by template or the included one.
51
+ # This option is set to false by default. If you always want to stay up to date
52
+ # with the template provided by Logstash, this option could be very useful to you.
53
+ # Likewise, if you have your own template file managed by puppet, for example, and
54
+ # you wanted to be able to update it regularly, this option could help there as well.
55
+ #
56
+ # Please note that if you are using your own customized version of the Logstash
57
+ # template (logstash), setting this to true will make Logstash to overwrite
58
+ # the "logstash" template (i.e. removing all customized settings)
59
+ mod.config :template_overwrite, :validate => :boolean, :default => false
60
+
61
+ # The document ID for the index. Useful for overwriting existing entries in
62
+ # Elasticsearch with the same ID.
63
+ mod.config :document_id, :validate => :string
64
+
65
+ # A routing override to be applied to all processed events.
66
+ # This can be dynamic using the `%{foo}` syntax.
67
+ mod.config :routing, :validate => :string
68
+
69
+ # For child documents, ID of the associated parent.
70
+ # This can be dynamic using the `%{foo}` syntax.
71
+ mod.config :parent, :validate => :string, :default => nil
72
+
73
+ # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
74
+ # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
75
+ # `"127.0.0.1"`
76
+ # `["127.0.0.1:9200","127.0.0.2:9200"]`
77
+ # `["http://127.0.0.1"]`
78
+ # `["https://127.0.0.1:9200"]`
79
+ # `["https://127.0.0.1:9200/mypath"]` (If using a proxy on a subpath)
80
+ # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `hosts` list
81
+ # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes in Elasticsearch.
82
+ mod.config :hosts, :validate => :array, :default => ["127.0.0.1"]
83
+
84
+ mod.config :host, :obsolete => "Please use the 'hosts' setting instead. You can specify multiple entries separated by comma in 'host:port' format."
85
+
86
+ # The port setting is obsolete. Please use the 'hosts' setting instead.
87
+ # Hosts entries can be in "host:port" format.
88
+ mod.config :port, :obsolete => "Please use the 'hosts' setting instead. Hosts entries can be in 'host:port' format."
89
+
90
+ # This plugin uses the bulk index API for improved indexing performance.
91
+ # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make
92
+ # You you may want to increase this to be in line with your pipeline's batch size.
93
+ # If you specify a number larger than the batch size of your pipeline it will have no effect,
94
+ # save for the case where a filter increases the size of an inflight batch by outputting
95
+ # events.
96
+ #
97
+ # In Logstashes <= 2.1 this plugin uses its own internal buffer of events.
98
+ # This config option sets that size. In these older logstashes this size may
99
+ # have a significant impact on heap usage, whereas in 2.2+ it will never increase it.
100
+ # To make efficient bulk API calls, we will buffer a certain number of
101
+ # events before flushing that out to Elasticsearch. This setting
102
+ # controls how many events will be buffered before sending a batch
103
+ # of events. Increasing the `flush_size` has an effect on Logstash's heap size.
104
+ # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big documents
105
+ # or have increased the `flush_size` to a higher value.
106
+ mod.config :flush_size, :validate => :number, :default => 500
107
+
108
+ # The amount of time since last flush before a flush is forced.
109
+ #
110
+ # This setting helps ensure slow event rates don't get stuck in Logstash.
111
+ # For example, if your `flush_size` is 100, and you have received 10 events,
112
+ # and it has been more than `idle_flush_time` seconds since the last flush,
113
+ # Logstash will flush those 10 events automatically.
114
+ #
115
+ # This helps keep both fast and slow log streams moving along in
116
+ # near-real-time.
117
+ mod.config :idle_flush_time, :validate => :number, :default => 1
118
+
119
+ # Set upsert content for update mode.s
120
+ # Create a new document with this parameter as json string if `document_id` doesn't exists
121
+ mod.config :upsert, :validate => :string, :default => ""
122
+
123
+ # Enable `doc_as_upsert` for update mode.
124
+ # Create a new document with source if `document_id` doesn't exist in Elasticsearch
125
+ mod.config :doc_as_upsert, :validate => :boolean, :default => false
126
+
127
+ # DEPRECATED This setting no longer does anything. It will be marked obsolete in a future version.
128
+ mod.config :max_retries, :validate => :number, :default => 3
129
+
130
+ # Set script name for scripted update mode
131
+ mod.config :script, :validate => :string, :default => ""
132
+
133
+ # Define the type of script referenced by "script" variable
134
+ # inline : "script" contains inline script
135
+ # indexed : "script" contains the name of script directly indexed in elasticsearch
136
+ # file : "script" contains the name of script stored in elasticseach's config directory
137
+ mod.config :script_type, :validate => ["inline", 'indexed', "file"], :default => ["inline"]
138
+
139
+ # Set the language of the used script
140
+ mod.config :script_lang, :validate => :string, :default => ""
141
+
142
+ # Set variable name passed to script (scripted update)
143
+ mod.config :script_var_name, :validate => :string, :default => "event"
144
+
145
+ # if enabled, script is in charge of creating non-existent document (scripted update)
146
+ mod.config :scripted_upsert, :validate => :boolean, :default => false
147
+
148
+ # Set max interval between bulk retries.
149
+ mod.config :retry_max_interval, :validate => :number, :default => 2
150
+
151
+ # DEPRECATED This setting no longer does anything. If you need to change the number of retries in flight
152
+ # try increasing the total number of workers to better handle this.
153
+ mod.config :retry_max_items, :validate => :number, :default => 500, :deprecated => true
154
+
155
+ # The number of times Elasticsearch should internally retry an update/upserted document
156
+ # See the https://www.elastic.co/guide/en/elasticsearch/guide/current/partial-updates.html[partial updates]
157
+ # for more info
158
+ mod.config :retry_on_conflict, :validate => :number, :default => 1
159
+
160
+ # Set which ingest pipeline you wish to execute for an event
161
+ mod.config :pipeline, :validate => :string, :default => nil
162
+ end
163
+ end
164
+ end end end