fluent-plugin-elasticsearch-sm 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |test|
5
+ test.libs << 'test'
6
+ test.pattern = 'test/**/test_*.rb'
7
+ test.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,249 @@
1
+ # encoding: UTF-8
2
+ require 'date'
3
+ require 'excon'
4
+ require 'elasticsearch'
5
+ require 'uri'
6
+ begin
7
+ require 'strptime'
8
+ rescue LoadError
9
+ end
10
+
11
+ class Fluent::ElasticsearchOutput < Fluent::BufferedOutput
12
+ class ConnectionFailure < StandardError; end
13
+
14
+ Fluent::Plugin.register_output('elasticsearch', self)
15
+
16
+ config_param :host, :string, :default => 'localhost'
17
+ config_param :port, :integer, :default => 9200
18
+ config_param :user, :string, :default => nil
19
+ config_param :password, :string, :default => nil, :secret => true
20
+ config_param :path, :string, :default => nil
21
+ config_param :scheme, :string, :default => 'http'
22
+ config_param :hosts, :string, :default => nil
23
+ config_param :target_index_key, :string, :default => nil
24
+ config_param :time_key_format, :string, :default => nil
25
+ config_param :logstash_format, :bool, :default => false
26
+ config_param :logstash_prefix, :string, :default => "logstash"
27
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
28
+ config_param :utc_index, :bool, :default => true
29
+ config_param :type_name, :string, :default => "fluentd"
30
+ config_param :index_name, :string, :default => "fluentd"
31
+ config_param :id_key, :string, :default => nil
32
+ config_param :write_operation, :string, :default => "index"
33
+ config_param :parent_key, :string, :default => nil
34
+ config_param :routing_key, :string, :default => nil
35
+ config_param :request_timeout, :time, :default => 5
36
+ config_param :reload_connections, :bool, :default => true
37
+ config_param :reload_on_failure, :bool, :default => false
38
+ config_param :resurrect_after, :time, :default => 60
39
+ config_param :time_key, :string, :default => nil
40
+ config_param :time_key_exclude_timestamp, :bool, :default => false
41
+ config_param :ssl_verify , :bool, :default => true
42
+ config_param :client_key, :string, :default => nil
43
+ config_param :client_cert, :string, :default => nil
44
+ config_param :client_key_pass, :string, :default => nil
45
+ config_param :ca_file, :string, :default => nil
46
+
47
+ include Fluent::SetTagKeyMixin
48
+ config_set_default :include_tag_key, false
49
+
50
+ def initialize
51
+ super
52
+ @time_parser = TimeParser.new(@time_key_format, @router)
53
+ end
54
+
55
+ def configure(conf)
56
+ super
57
+ @time_parser = TimeParser.new(@time_key_format, @router)
58
+ end
59
+
60
+ def start
61
+ super
62
+ end
63
+
64
+ # once fluent v0.14 is released we might be able to use
65
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
66
+ # [sec,nsec] where as we want something we can call `strftime` on...
67
+ class TimeParser
68
+ def initialize(time_key_format, router)
69
+ @time_key_format = time_key_format
70
+ @router = router
71
+ @parser = if time_key_format
72
+ begin
73
+ # Strptime doesn't support all formats, but for those it does it's
74
+ # blazingly fast.
75
+ strptime = Strptime.new(time_key_format)
76
+ Proc.new { |value| strptime.exec(value).to_datetime }
77
+ rescue
78
+ # Can happen if Strptime doesn't recognize the format; or
79
+ # if strptime couldn't be required (because it's not installed -- it's
80
+ # ruby 2 only)
81
+ Proc.new { |value| DateTime.strptime(value, time_key_format) }
82
+ end
83
+ else
84
+ Proc.new { |value| DateTime.parse(value) }
85
+ end
86
+ end
87
+
88
+ def parse(value, event_time)
89
+ @parser.call(value)
90
+ rescue => e
91
+ @router.emit_error_event("Fluent::ElasticsearchOutput::TimeParser.error", Fluent::Engine.now, {'time' => event_time, 'format' => @time_key_format, 'value' => value }, e)
92
+ return Time.at(event_time).to_datetime
93
+ end
94
+ end
95
+
96
+ def client
97
+ @_es ||= begin
98
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
99
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
100
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
101
+ options: {
102
+ reload_connections: @reload_connections,
103
+ reload_on_failure: @reload_on_failure,
104
+ resurrect_after: @resurrect_after,
105
+ retry_on_failure: 5,
106
+ transport_options: {
107
+ request: { timeout: @request_timeout },
108
+ ssl: { verify: @ssl_verify, ca_file: @ca_file }
109
+ }
110
+ }), &adapter_conf)
111
+ es = Elasticsearch::Client.new transport: transport
112
+
113
+ begin
114
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
115
+ rescue *es.transport.host_unreachable_exceptions => e
116
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
117
+ end
118
+
119
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
120
+ es
121
+ end
122
+ end
123
+
124
+ def get_connection_options
125
+ raise "`password` must be present if `user` is present" if @user && !@password
126
+
127
+ hosts = if @hosts
128
+ @hosts.split(',').map do |host_str|
129
+ # Support legacy hosts format host:port,host:port,host:port...
130
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
131
+ {
132
+ host: host_str.split(':')[0],
133
+ port: (host_str.split(':')[1] || @port).to_i,
134
+ scheme: @scheme
135
+ }
136
+ else
137
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
138
+ uri = URI(host_str)
139
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
140
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
141
+ hash
142
+ end
143
+ end
144
+ end.compact
145
+ else
146
+ [{host: @host, port: @port, scheme: @scheme}]
147
+ end.each do |host|
148
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
149
+ host.merge!(path: @path) if !host[:path] && @path
150
+ end
151
+
152
+ {
153
+ hosts: hosts
154
+ }
155
+ end
156
+
157
+ def connection_options_description
158
+ get_connection_options[:hosts].map do |host_info|
159
+ attributes = host_info.dup
160
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
161
+ attributes.inspect
162
+ end.join(', ')
163
+ end
164
+
165
+ def format(tag, time, record)
166
+ [tag, time, record].to_msgpack
167
+ end
168
+
169
+ def shutdown
170
+ super
171
+ end
172
+
173
+ def append_record_to_messages(op, meta, record, msgs)
174
+ case op
175
+ when "update", "upsert"
176
+ if meta.has_key?("_id")
177
+ msgs << { "update" => meta }
178
+ msgs << { "doc" => record, "doc_as_upsert" => op == "upsert" }
179
+ end
180
+ when "create"
181
+ if meta.has_key?("_id")
182
+ msgs << { "create" => meta }
183
+ msgs << record
184
+ end
185
+ when "index"
186
+ msgs << { "index" => meta }
187
+ msgs << record
188
+ end
189
+ end
190
+
191
+ def write(chunk)
192
+ bulk_message = []
193
+
194
+ chunk.msgpack_each do |tag, time, record|
195
+ next unless record.is_a? Hash
196
+ if @target_index_key && record[@target_index_key]
197
+ target_index = record.delete @target_index_key
198
+ elsif @logstash_format
199
+ if record.has_key?("@timestamp")
200
+ dt = record["@timestamp"]
201
+ dt = @time_parser.parse(record["@timestamp"], time)
202
+ elsif record.has_key?(@time_key)
203
+ dt = @time_parser.parse(record[@time_key], time)
204
+ record['@timestamp'] = record[@time_key] unless time_key_exclude_timestamp
205
+ else
206
+ dt = Time.at(time).to_datetime
207
+ record.merge!({"@timestamp" => dt.to_s})
208
+ end
209
+ dt = dt.new_offset(0) if @utc_index
210
+ target_index = "#{@logstash_prefix}-#{dt.strftime(@logstash_dateformat)}"
211
+ else
212
+ target_index = @index_name
213
+ end
214
+
215
+ if @include_tag_key
216
+ record.merge!(@tag_key => tag)
217
+ end
218
+
219
+ meta = {"_index" => target_index, "_type" => type_name}
220
+
221
+ @meta_config_map ||= { 'id_key' => '_id', 'parent_key' => '_parent', 'routing_key' => '_routing' }
222
+ @meta_config_map.each_pair do |config_name, meta_key|
223
+ record_key = self.instance_variable_get("@#{config_name}")
224
+ meta[meta_key] = record[record_key] if record_key && record[record_key]
225
+ end
226
+
227
+ append_record_to_messages(@write_operation, meta, record, bulk_message)
228
+ end
229
+
230
+ send(bulk_message) unless bulk_message.empty?
231
+ bulk_message.clear
232
+ end
233
+
234
+ def send(data)
235
+ retries = 0
236
+ begin
237
+ client.bulk body: data
238
+ rescue *client.transport.host_unreachable_exceptions => e
239
+ if retries < 2
240
+ retries += 1
241
+ @_es = nil
242
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
243
+ sleep 2**retries
244
+ retry
245
+ end
246
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
247
+ end
248
+ end
249
+ end
@@ -0,0 +1,239 @@
1
+ # encoding: UTF-8
2
+ require_relative 'out_elasticsearch'
3
+
4
+ class Fluent::ElasticsearchOutputDynamic < Fluent::ElasticsearchOutput
5
+
6
+ Fluent::Plugin.register_output('elasticsearch_dynamic', self)
7
+
8
+ config_param :delimiter, :string, :default => "."
9
+
10
+ # params overloaded as strings
11
+ config_param :port, :string, :default => "9200"
12
+ config_param :logstash_format, :string, :default => "false"
13
+ config_param :utc_index, :string, :default => "true"
14
+ config_param :time_key_exclude_timestamp, :bool, :default => false
15
+ config_param :reload_connections, :string, :default => "true"
16
+ config_param :reload_on_failure, :string, :default => "false"
17
+ config_param :resurrect_after, :string, :default => "60"
18
+ config_param :ssl_verify, :string, :dfeault => "true"
19
+
20
+ def configure(conf)
21
+ super
22
+
23
+ # evaluate all configurations here
24
+ @dynamic_params = self.instance_variables.select { |var| is_valid_expand_param_type(var) }
25
+ @dynamic_config = Hash.new
26
+ @dynamic_params.each { |var|
27
+ value = expand_param(self.instance_variable_get(var), nil, nil, nil)
28
+ var = var[1..-1]
29
+ @dynamic_config[var] = value
30
+ }
31
+ # end eval all configs
32
+ @current_config = nil
33
+ end
34
+
35
+ def client(host)
36
+
37
+ # check here to see if we already have a client connection for the given host
38
+ connection_options = get_connection_options(host)
39
+
40
+ @_es = nil unless is_existing_connection(connection_options[:hosts])
41
+
42
+ @_es ||= begin
43
+ @current_config = connection_options[:hosts].clone
44
+ excon_options = { client_key: @dynamic_config['client_key'], client_cert: @dynamic_config['client_cert'], client_key_pass: @dynamic_config['client_key_pass'] }
45
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
46
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(connection_options.merge(
47
+ options: {
48
+ reload_connections: @dynamic_config['reload_connections'],
49
+ reload_on_failure: @dynamic_config['reload_on_failure'],
50
+ resurrect_after: @dynamic_config['resurrect_after'].to_i,
51
+ retry_on_failure: 5,
52
+ transport_options: {
53
+ request: { timeout: @dynamic_config['request_timeout'] },
54
+ ssl: { verify: @dynamic_config['ssl_verify'], ca_file: @dynamic_config['ca_file'] }
55
+ }
56
+ }), &adapter_conf)
57
+ es = Elasticsearch::Client.new transport: transport
58
+
59
+ begin
60
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description(host)})!" unless es.ping
61
+ rescue *es.transport.host_unreachable_exceptions => e
62
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description(host)})! #{e.message}"
63
+ end
64
+
65
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description(host)}"
66
+ es
67
+ end
68
+ end
69
+
70
+ def get_connection_options(con_host)
71
+ raise "`password` must be present if `user` is present" if @dynamic_config['user'] && !@dynamic_config['password']
72
+
73
+ hosts = if con_host || @dynamic_config['hosts']
74
+ (con_host || @dynamic_config['hosts']).split(',').map do |host_str|
75
+ # Support legacy hosts format host:port,host:port,host:port...
76
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
77
+ {
78
+ host: host_str.split(':')[0],
79
+ port: (host_str.split(':')[1] || @dynamic_config['port']).to_i,
80
+ scheme: @dynamic_config['scheme']
81
+ }
82
+ else
83
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
84
+ uri = URI(host_str)
85
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
86
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
87
+ hash
88
+ end
89
+ end
90
+ end.compact
91
+ else
92
+ [{host: @dynamic_config['host'], port: @dynamic_config['port'].to_i, scheme: @dynamic_config['scheme']}]
93
+ end.each do |host|
94
+ host.merge!(user: @dynamic_config['user'], password: @dynamic_config['password']) if !host[:user] && @dynamic_config['user']
95
+ host.merge!(path: @dynamic_config['path']) if !host[:path] && @dynamic_config['path']
96
+ end
97
+
98
+ {
99
+ hosts: hosts
100
+ }
101
+ end
102
+
103
+ def connection_options_description(host)
104
+ get_connection_options(host)[:hosts].map do |host_info|
105
+ attributes = host_info.dup
106
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
107
+ attributes.inspect
108
+ end.join(', ')
109
+ end
110
+
111
+ def write(chunk)
112
+ bulk_message = Hash.new { |h,k| h[k] = [] }
113
+ dynamic_conf = @dynamic_config.clone
114
+
115
+ chunk.msgpack_each do |tag, time, record|
116
+ next unless record.is_a? Hash
117
+
118
+ # evaluate all configurations here
119
+ @dynamic_params.each { |var|
120
+ k = var[1..-1]
121
+ v = self.instance_variable_get(var)
122
+ # check here to determine if we should evaluate
123
+ if dynamic_conf[k] != v
124
+ value = expand_param(v, tag, time, record)
125
+ dynamic_conf[k] = value
126
+ end
127
+ }
128
+ # end eval all configs
129
+
130
+ if eval(dynamic_conf['logstash_format'])
131
+ if record.has_key?("@timestamp")
132
+ time = Time.parse record["@timestamp"]
133
+ elsif record.has_key?(dynamic_conf['time_key'])
134
+ time = Time.parse record[dynamic_conf['time_key']]
135
+ record['@timestamp'] = record[dynamic_conf['time_key']] unless time_key_exclude_timestamp
136
+ else
137
+ record.merge!({"@timestamp" => Time.at(time).to_datetime.to_s})
138
+ end
139
+
140
+ if eval(dynamic_conf['utc_index'])
141
+ target_index = "#{dynamic_conf['logstash_prefix']}-#{Time.at(time).getutc.strftime("#{dynamic_conf['logstash_dateformat']}")}"
142
+ else
143
+ target_index = "#{dynamic_conf['logstash_prefix']}-#{Time.at(time).strftime("#{dynamic_conf['logstash_dateformat']}")}"
144
+ end
145
+ else
146
+ target_index = dynamic_conf['index_name']
147
+ end
148
+
149
+ if @include_tag_key
150
+ record.merge!(dynamic_conf['tag_key'] => tag)
151
+ end
152
+
153
+ meta = {"_index" => target_index, "_type" => dynamic_conf['type_name']}
154
+
155
+ @meta_config_map ||= { 'id_key' => '_id', 'parent_key' => '_parent', 'routing_key' => '_routing' }
156
+ @meta_config_map.each_pair do |config_name, meta_key|
157
+ if dynamic_conf[config_name] && record[dynamic_conf[config_name]]
158
+ meta[meta_key] = record[dynamic_conf[config_name]]
159
+ end
160
+ end
161
+
162
+ if dynamic_conf['hosts']
163
+ host = dynamic_conf['hosts']
164
+ else
165
+ host = "#{dynamic_conf['host']}:#{dynamic_conf['port']}"
166
+ end
167
+
168
+ append_record_to_messages(dynamic_conf["write_operation"], meta, record, bulk_message[host])
169
+ end
170
+
171
+ bulk_message.each do | hKey, array |
172
+ send(array, hKey) unless array.empty?
173
+ array.clear
174
+ end
175
+ end
176
+
177
+ def send(data, host)
178
+ retries = 0
179
+ begin
180
+ client(host).bulk body: data
181
+ rescue *client(host).transport.host_unreachable_exceptions => e
182
+ if retries < 2
183
+ retries += 1
184
+ @_es = nil
185
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
186
+ sleep 2**retries
187
+ retry
188
+ end
189
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
190
+ end
191
+ end
192
+
193
+ def expand_param(param, tag, time, record)
194
+ # check for '${ ... }'
195
+ # yes => `eval`
196
+ # no => return param
197
+ return param if (param =~ /\${.+}/).nil?
198
+
199
+ # check for 'tag_parts[]'
200
+ # separated by a delimiter (default '.')
201
+ tag_parts = tag.split(@delimiter) unless (param =~ /tag_parts\[.+\]/).nil? || tag.nil?
202
+
203
+ # pull out section between ${} then eval
204
+ inner = param.clone
205
+ while inner.match(/\${.+}/)
206
+ to_eval = inner.match(/\${(.+?)}/){$1}
207
+
208
+ if !(to_eval =~ /record\[.+\]/).nil? && record.nil?
209
+ return to_eval
210
+ elsif !(to_eval =~/tag_parts\[.+\]/).nil? && tag_parts.nil?
211
+ return to_eval
212
+ elsif !(to_eval =~/time/).nil? && time.nil?
213
+ return to_eval
214
+ else
215
+ inner.sub!(/\${.+?}/, eval( to_eval ))
216
+ end
217
+ end
218
+ inner
219
+ end
220
+
221
+ def is_valid_expand_param_type(param)
222
+ return false if [:@buffer_type].include?(param)
223
+ return self.instance_variable_get(param).is_a?(String)
224
+ end
225
+
226
+ def is_existing_connection(host)
227
+ # check if the host provided match the current connection
228
+ return false if @_es.nil?
229
+ return false if host.length != @current_config.length
230
+
231
+ for i in 0...host.length
232
+ if !host[i][:host].eql? @current_config[i][:host] || host[i][:port] != @current_config[i][:port]
233
+ return false
234
+ end
235
+ end
236
+
237
+ return true
238
+ end
239
+ end