logstash-output-elasticsearch 2.1.2-java → 2.1.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ require 'concurrent'
2
+ java_import java.util.concurrent.locks.ReentrantLock
3
+
4
+ module LogStash; module Outputs; class ElasticSearch
5
+ class Buffer
6
+ def initialize(logger, max_size, flush_interval, &block)
7
+ @logger = logger
8
+ # You need to aquire this for anything modifying state generally
9
+ @operations_mutex = Mutex.new
10
+ @operations_lock = java.util.concurrent.locks.ReentrantLock.new
11
+
12
+ @stopping = Concurrent::AtomicBoolean.new(false)
13
+ @max_size = max_size
14
+ @submit_proc = block
15
+
16
+ @buffer = []
17
+
18
+ @last_flush = Time.now
19
+ @flush_interval = flush_interval
20
+ @flush_thread = spawn_interval_flusher
21
+ end
22
+
23
+ def push(item)
24
+ synchronize do |buffer|
25
+ push_unsafe(item)
26
+ end
27
+ end
28
+ alias_method :<<, :push
29
+
30
+ # Push multiple items onto the buffer in a single operation
31
+ def push_multi(items)
32
+ raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array)
33
+ synchronize do |buffer|
34
+ items.each {|item| push_unsafe(item) }
35
+ end
36
+ end
37
+
38
+ def flush
39
+ synchronize { flush_unsafe }
40
+ end
41
+
42
+ def stop(do_flush=true,wait_complete=true)
43
+ return if stopping?
44
+ @stopping.make_true
45
+
46
+ # No need to acquire a lock in this case
47
+ return if !do_flush && !wait_complete
48
+
49
+ synchronize do
50
+ flush_unsafe if do_flush
51
+ @flush_thread.join if wait_complete
52
+ end
53
+ end
54
+
55
+ def contents
56
+ synchronize {|buffer| buffer}
57
+ end
58
+
59
+ # For externally operating on the buffer contents
60
+ # this takes a block and will yield the internal buffer and executes
61
+ # the block in a synchronized block from the internal mutex
62
+ def synchronize
63
+ @operations_mutex.synchronize { yield(@buffer) }
64
+ end
65
+
66
+ # These methods are private for various reasons, chief among them threadsafety!
67
+ # Many require the @operations_mutex to be locked to be safe
68
+ private
69
+
70
+ def push_unsafe(item)
71
+ @buffer << item
72
+ if @buffer.size >= @max_size
73
+ flush_unsafe
74
+ end
75
+ end
76
+
77
+ def spawn_interval_flusher
78
+ Thread.new do
79
+ loop do
80
+ sleep 0.2
81
+ break if stopping?
82
+ synchronize { interval_flush }
83
+ end
84
+ end
85
+ end
86
+
87
+ def interval_flush
88
+ if last_flush_seconds_ago >= @flush_interval
89
+ begin
90
+ @logger.info? && @logger.info("Flushing buffer at interval",
91
+ :instance => self.inspect,
92
+ :interval => @flush_interval)
93
+ flush_unsafe
94
+ rescue StandardError => e
95
+ @logger.warn("Error flushing buffer at interval!",
96
+ :instance => self.inspect,
97
+ :message => e.message,
98
+ :class => e.class.name,
99
+ :backtrace => e.backtrace
100
+ )
101
+ rescue Exception => e
102
+ @logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name)
103
+ end
104
+ end
105
+ end
106
+
107
+ def flush_unsafe
108
+ if @buffer.size > 0
109
+ @submit_proc.call(@buffer)
110
+ @buffer.clear
111
+ end
112
+
113
+ @last_flush = Time.now # This must always be set to ensure correct timer behavior
114
+ end
115
+
116
+ def last_flush_seconds_ago
117
+ Time.now - @last_flush
118
+ end
119
+
120
+ def stopping?
121
+ @stopping.true?
122
+ end
123
+ end
124
+ end end end
@@ -0,0 +1,173 @@
1
+ require "logstash/outputs/elasticsearch/template_manager"
2
+ require "logstash/outputs/elasticsearch/buffer"
3
+
4
+ module LogStash; module Outputs; class ElasticSearch;
5
+ module Common
6
+ attr_reader :client, :hosts
7
+
8
+ RETRYABLE_CODES = [409, 429, 503]
9
+ SUCCESS_CODES = [200, 201]
10
+
11
+ def register
12
+ @stopping = Concurrent::AtomicBoolean.new(false)
13
+ setup_hosts # properly sets @hosts
14
+ build_client
15
+ install_template
16
+ setup_buffer_and_handler
17
+
18
+ @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts)
19
+ end
20
+
21
+ def receive(event)
22
+ @buffer << event_action_tuple(event)
23
+ end
24
+
25
+ # Receive an array of events and immediately attempt to index them (no buffering)
26
+ def multi_receive(events)
27
+ retrying_submit(events.map {|e| event_action_tuple(e) })
28
+ end
29
+
30
+ # Convert the event into a 3-tuple of action, params, and event
31
+ def event_action_tuple(event)
32
+ params = event_action_params(event)
33
+ action = event.sprintf(@action)
34
+ [action, params, event]
35
+ end
36
+
37
+ def flush
38
+ @buffer.flush
39
+ end
40
+
41
+ def setup_hosts
42
+ @hosts = Array(@hosts)
43
+ if @hosts.empty?
44
+ @logger.info("No 'host' set in elasticsearch output. Defaulting to localhost")
45
+ @hosts.replace(["localhost"])
46
+ end
47
+ end
48
+
49
+ def install_template
50
+ TemplateManager.install_template(self)
51
+ end
52
+
53
+ def setup_buffer_and_handler
54
+ @buffer = ::LogStash::Outputs::ElasticSearch::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions|
55
+ retrying_submit(actions)
56
+ end
57
+ end
58
+
59
+ def retrying_submit(actions)
60
+ retries_left = @max_retries+1 # +1 for the first attempt
61
+
62
+ # Initially we submit the full list of actions
63
+ submit_actions = actions
64
+
65
+ while submit_actions && submit_actions.length > 0 && retries_left > 0
66
+ return if !submit_actions || submit_actions.empty? # If everything's a success we move along
67
+ # We retry with whatever is didn't succeed
68
+ begin
69
+ submit_actions = submit(submit_actions)
70
+ rescue => e
71
+ @logger.warn("Encountered an unexpected error submitting a bulk request!",
72
+ :message => e.message,
73
+ :class => e.class.name,
74
+ :backtrace => e.backtrace)
75
+ end
76
+ retries_left -= 1
77
+ end
78
+ end
79
+
80
+ def submit(actions)
81
+ es_actions = actions.map { |a, doc, event| [a, doc, event.to_hash]}
82
+
83
+ bulk_response = safe_bulk(es_actions,actions)
84
+
85
+ # If there are no errors, we're done here!
86
+ return unless bulk_response["errors"]
87
+
88
+ actions_to_retry = []
89
+ bulk_response["items"].each_with_index do |response,idx|
90
+ action_type, action_props = response.first
91
+ status = action_props["status"]
92
+ action = actions[idx]
93
+
94
+ if SUCCESS_CODES.include?(status)
95
+ next
96
+ elsif RETRYABLE_CODES.include?(status)
97
+ @logger.warn "retrying failed action with response code: #{status}"
98
+ actions_to_retry << action
99
+ else
100
+ @logger.warn "Failed action. ", status: status, action: action, response: response
101
+ end
102
+ end
103
+
104
+ actions_to_retry
105
+ end
106
+
107
+ # get the action parameters for the given event
108
+ def event_action_params(event)
109
+ type = get_event_type(event)
110
+
111
+ params = {
112
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
113
+ :_index => event.sprintf(@index),
114
+ :_type => type,
115
+ :_routing => @routing ? event.sprintf(@routing) : nil
116
+ }
117
+
118
+ params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @action == 'update' && @upsert != ""
119
+ params
120
+ end
121
+
122
+ # Determine the correct value for the 'type' field for the given event
123
+ def get_event_type(event)
124
+ # Set the 'type' value for the index.
125
+ type = if @document_type
126
+ event.sprintf(@document_type)
127
+ else
128
+ event["type"] || "logs"
129
+ end
130
+
131
+ if !(type.is_a?(String) || type.is_a?(Numeric))
132
+ @logger.warn("Bad event type! Non-string/integer type value set!", :type_class => type.class, :type_value => type.to_s, :event => event)
133
+ end
134
+
135
+ type.to_s
136
+ end
137
+
138
+ # Rescue retryable errors during bulk submission
139
+ def safe_bulk(es_actions,actions)
140
+ @client.bulk(es_actions)
141
+ rescue Manticore::SocketException, Manticore::SocketTimeout => e
142
+ # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
143
+ # and let the user sort it out from there
144
+ @logger.error(
145
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}',"+
146
+ " but Elasticsearch appears to be unreachable or down!",
147
+ :client_config => @client.client_options,
148
+ :error_message => e.message,
149
+ :class => e.class.name
150
+ )
151
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
152
+
153
+ # We retry until there are no errors! Errors should all go to the retry queue
154
+ sleep @retry_max_interval
155
+ retry unless @stopping.true?
156
+ rescue => e
157
+ # For all other errors print out full connection issues
158
+ @logger.error(
159
+ "Attempted to send a bulk request to Elasticsearch configured at '#{@client.client_options[:hosts]}'," +
160
+ " but an error occurred and it failed! Are you sure you can reach elasticsearch from this machine using " +
161
+ "the configuration provided?",
162
+ :client_config => @client.client_options,
163
+ :error_message => e.message,
164
+ :error_class => e.class.name,
165
+ :backtrace => e.backtrace
166
+ )
167
+
168
+ @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
169
+
170
+ raise e
171
+ end
172
+ end
173
+ end; end; end
@@ -0,0 +1,113 @@
1
+ module LogStash; module Outputs; class ElasticSearch
2
+ module CommonConfigs
3
+ def self.included(mod)
4
+ # The index to write events to. This can be dynamic using the `%{foo}` syntax.
5
+ # The default value will partition your indices by day so you can more easily
6
+ # delete old data or only search specific date ranges.
7
+ # Indexes may not contain uppercase characters.
8
+ # For weekly indexes ISO 8601 format is recommended, eg. logstash-%{+xxxx.ww}
9
+ mod.config :index, :validate => :string, :default => "logstash-%{+YYYY.MM.dd}"
10
+
11
+ # The index type to write events to. Generally you should try to write only
12
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
13
+ #
14
+ # Deprecated in favor of `docoument_type` field.
15
+ mod.config :index_type, :validate => :string, :obsolete => "Please use the 'document_type' setting instead. It has the same effect, but is more appropriately named."
16
+
17
+ # The document type to write events to. Generally you should try to write only
18
+ # similar events to the same 'type'. String expansion `%{foo}` works here.
19
+ # Unless you set 'document_type', the event 'type' will be used if it exists
20
+ # otherwise the document type will be assigned the value of 'logs'
21
+ mod.config :document_type, :validate => :string
22
+
23
+ # Starting in Logstash 1.3 (unless you set option `manage_template` to false)
24
+ # a default mapping template for Elasticsearch will be applied, if you do not
25
+ # already have one set to match the index pattern defined (default of
26
+ # `logstash-%{+YYYY.MM.dd}`), minus any variables. For example, in this case
27
+ # the template will be applied to all indices starting with `logstash-*`
28
+ #
29
+ # If you have dynamic templating (e.g. creating indices based on field names)
30
+ # then you should set `manage_template` to false and use the REST API to upload
31
+ # your templates manually.
32
+ mod.config :manage_template, :validate => :boolean, :default => true
33
+
34
+ # This configuration option defines how the template is named inside Elasticsearch.
35
+ # Note that if you have used the template management features and subsequently
36
+ # change this, you will need to prune the old template manually, e.g.
37
+ #
38
+ # `curl -XDELETE <http://localhost:9200/_template/OldTemplateName?pretty>`
39
+ #
40
+ # where `OldTemplateName` is whatever the former setting was.
41
+ mod.config :template_name, :validate => :string, :default => "logstash"
42
+
43
+ # You can set the path to your own template here, if you so desire.
44
+ # If not set, the included template will be used.
45
+ mod.config :template, :validate => :path
46
+
47
+ # Overwrite the current template with whatever is configured
48
+ # in the `template` and `template_name` directives.
49
+ mod.config :template_overwrite, :validate => :boolean, :default => false
50
+
51
+ # The document ID for the index. Useful for overwriting existing entries in
52
+ # Elasticsearch with the same ID.
53
+ mod.config :document_id, :validate => :string
54
+
55
+ # A routing override to be applied to all processed events.
56
+ # This can be dynamic using the `%{foo}` syntax.
57
+ mod.config :routing, :validate => :string
58
+
59
+ # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
60
+ # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
61
+ # `"127.0.0.1"`
62
+ # `["127.0.0.1:9200","127.0.0.2:9200"]`
63
+ # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `hosts` list
64
+ # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes in Elasticsearch.
65
+ mod.config :hosts, :validate => :array, :default => ["127.0.0.1"]
66
+
67
+ mod.config :host, :obsolete => "Please use the 'hosts' setting instead. You can specify multiple entries separated by comma in 'host:port' format."
68
+
69
+ # The port setting is obsolete. Please use the 'hosts' setting instead.
70
+ # Hosts entries can be in "host:port" format.
71
+ mod.config :port, :obsolete => "Please use the 'hosts' setting instead. Hosts entries can be in 'host:port' format."
72
+
73
+ # This plugin uses the bulk index API for improved indexing performance.
74
+ # To make efficient bulk API calls, we will buffer a certain number of
75
+ # events before flushing that out to Elasticsearch. This setting
76
+ # controls how many events will be buffered before sending a batch
77
+ # of events. Increasing the `flush_size` has an effect on Logstash's heap size.
78
+ # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big documents
79
+ # or have increased the `flush_size` to a higher value.
80
+ mod.config :flush_size, :validate => :number, :default => 500
81
+
82
+ # The amount of time since last flush before a flush is forced.
83
+ #
84
+ # This setting helps ensure slow event rates don't get stuck in Logstash.
85
+ # For example, if your `flush_size` is 100, and you have received 10 events,
86
+ # and it has been more than `idle_flush_time` seconds since the last flush,
87
+ # Logstash will flush those 10 events automatically.
88
+ #
89
+ # This helps keep both fast and slow log streams moving along in
90
+ # near-real-time.
91
+ mod.config :idle_flush_time, :validate => :number, :default => 1
92
+
93
+ # Set upsert content for update mode.s
94
+ # Create a new document with this parameter as json string if `document_id` doesn't exists
95
+ mod.config :upsert, :validate => :string, :default => ""
96
+
97
+ # Enable `doc_as_upsert` for update mode.
98
+ # Create a new document with source if `document_id` doesn't exist in Elasticsearch
99
+ mod.config :doc_as_upsert, :validate => :boolean, :default => false
100
+
101
+ # Set max retry for each event. The total time spent blocked on retries will be
102
+ # (max_retries * retry_max_interval). This may vary a bit if Elasticsearch is very slow to respond
103
+ mod.config :max_retries, :validate => :number, :default => 3
104
+
105
+ # Set max interval between bulk retries.
106
+ mod.config :retry_max_interval, :validate => :number, :default => 2
107
+
108
+ # DEPRECATED This setting no longer does anything. If you need to change the number of retries in flight
109
+ # try increasing the total number of workers to better handle this.
110
+ mod.config :retry_max_items, :validate => :number, :default => 500, :deprecated => true
111
+ end
112
+ end
113
+ end end end
@@ -5,37 +5,91 @@
5
5
  },
6
6
  "mappings" : {
7
7
  "_default_" : {
8
- "_all" : {"enabled" : true, "omit_norms" : true},
9
- "dynamic_templates" : [ {
10
- "message_field" : {
11
- "match" : "message",
12
- "match_mapping_type" : "string",
13
- "mapping" : {
14
- "type" : "string", "index" : "analyzed", "omit_norms" : true
15
- }
16
- }
17
- }, {
18
- "string_fields" : {
19
- "match" : "*",
20
- "match_mapping_type" : "string",
21
- "mapping" : {
22
- "type" : "string", "index" : "analyzed", "omit_norms" : true,
23
- "fields" : {
24
- "raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
25
- }
26
- }
27
- }
28
- } ],
29
- "properties" : {
30
- "@version": { "type": "string", "index": "not_analyzed" },
31
- "geoip" : {
32
- "type" : "object",
33
- "dynamic": true,
34
- "properties" : {
35
- "location" : { "type" : "geo_point" }
36
- }
37
- }
38
- }
8
+ "_all" : {"enabled" : true, "omit_norms" : true},
9
+ "dynamic_templates" : [ {
10
+ "message_field" : {
11
+ "match" : "message",
12
+ "match_mapping_type" : "string",
13
+ "mapping" : {
14
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
15
+ "fielddata" : { "format" : "disabled" }
16
+ }
17
+ }
18
+ }, {
19
+ "string_fields" : {
20
+ "match" : "*",
21
+ "match_mapping_type" : "string",
22
+ "mapping" : {
23
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
24
+ "fielddata" : { "format" : "disabled" },
25
+ "fields" : {
26
+ "raw" : {"type": "string", "index" : "not_analyzed", "doc_values" : true, "ignore_above" : 256}
27
+ }
28
+ }
29
+ }
30
+ }, {
31
+ "float_fields" : {
32
+ "match" : "*",
33
+ "match_mapping_type" : "float",
34
+ "mapping" : { "type" : "float", "doc_values" : true }
35
+ }
36
+ }, {
37
+ "double_fields" : {
38
+ "match" : "*",
39
+ "match_mapping_type" : "double",
40
+ "mapping" : { "type" : "double", "doc_values" : true }
41
+ }
42
+ }, {
43
+ "byte_fields" : {
44
+ "match" : "*",
45
+ "match_mapping_type" : "byte",
46
+ "mapping" : { "type" : "byte", "doc_values" : true }
47
+ }
48
+ }, {
49
+ "short_fields" : {
50
+ "match" : "*",
51
+ "match_mapping_type" : "short",
52
+ "mapping" : { "type" : "short", "doc_values" : true }
53
+ }
54
+ }, {
55
+ "integer_fields" : {
56
+ "match" : "*",
57
+ "match_mapping_type" : "integer",
58
+ "mapping" : { "type" : "integer", "doc_values" : true }
59
+ }
60
+ }, {
61
+ "long_fields" : {
62
+ "match" : "*",
63
+ "match_mapping_type" : "long",
64
+ "mapping" : { "type" : "long", "doc_values" : true }
65
+ }
66
+ }, {
67
+ "date_fields" : {
68
+ "match" : "*",
69
+ "match_mapping_type" : "date",
70
+ "mapping" : { "type" : "date", "doc_values" : true }
71
+ }
72
+ }, {
73
+ "geo_point_fields" : {
74
+ "match" : "*",
75
+ "match_mapping_type" : "geo_point",
76
+ "mapping" : { "type" : "geo_point", "doc_values" : true }
77
+ }
78
+ } ],
79
+ "properties" : {
80
+ "@timestamp": { "type": "date", "doc_values" : true },
81
+ "@version": { "type": "string", "index": "not_analyzed", "doc_values" : true },
82
+ "geoip" : {
83
+ "type" : "object",
84
+ "dynamic": true,
85
+ "properties" : {
86
+ "ip": { "type": "ip", "doc_values" : true },
87
+ "location" : { "type" : "geo_point", "doc_values" : true },
88
+ "latitude" : { "type" : "float", "doc_values" : true },
89
+ "longitude" : { "type" : "float", "doc_values" : true }
90
+ }
91
+ }
92
+ }
39
93
  }
40
94
  }
41
95
  }