fluent-plugin-elasticsearch-sm 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |test|
5
+ test.libs << 'test'
6
+ test.pattern = 'test/**/test_*.rb'
7
+ test.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,249 @@
1
+ # encoding: UTF-8
2
+ require 'date'
3
+ require 'excon'
4
+ require 'elasticsearch'
5
+ require 'uri'
6
+ begin
7
+ require 'strptime'
8
+ rescue LoadError
9
+ end
10
+
11
+ class Fluent::ElasticsearchOutput < Fluent::BufferedOutput
12
+ class ConnectionFailure < StandardError; end
13
+
14
+ Fluent::Plugin.register_output('elasticsearch', self)
15
+
16
+ config_param :host, :string, :default => 'localhost'
17
+ config_param :port, :integer, :default => 9200
18
+ config_param :user, :string, :default => nil
19
+ config_param :password, :string, :default => nil, :secret => true
20
+ config_param :path, :string, :default => nil
21
+ config_param :scheme, :string, :default => 'http'
22
+ config_param :hosts, :string, :default => nil
23
+ config_param :target_index_key, :string, :default => nil
24
+ config_param :time_key_format, :string, :default => nil
25
+ config_param :logstash_format, :bool, :default => false
26
+ config_param :logstash_prefix, :string, :default => "logstash"
27
+ config_param :logstash_dateformat, :string, :default => "%Y.%m.%d"
28
+ config_param :utc_index, :bool, :default => true
29
+ config_param :type_name, :string, :default => "fluentd"
30
+ config_param :index_name, :string, :default => "fluentd"
31
+ config_param :id_key, :string, :default => nil
32
+ config_param :write_operation, :string, :default => "index"
33
+ config_param :parent_key, :string, :default => nil
34
+ config_param :routing_key, :string, :default => nil
35
+ config_param :request_timeout, :time, :default => 5
36
+ config_param :reload_connections, :bool, :default => true
37
+ config_param :reload_on_failure, :bool, :default => false
38
+ config_param :resurrect_after, :time, :default => 60
39
+ config_param :time_key, :string, :default => nil
40
+ config_param :time_key_exclude_timestamp, :bool, :default => false
41
+ config_param :ssl_verify , :bool, :default => true
42
+ config_param :client_key, :string, :default => nil
43
+ config_param :client_cert, :string, :default => nil
44
+ config_param :client_key_pass, :string, :default => nil
45
+ config_param :ca_file, :string, :default => nil
46
+
47
+ include Fluent::SetTagKeyMixin
48
+ config_set_default :include_tag_key, false
49
+
50
+ def initialize
51
+ super
52
+ @time_parser = TimeParser.new(@time_key_format, @router)
53
+ end
54
+
55
+ def configure(conf)
56
+ super
57
+ @time_parser = TimeParser.new(@time_key_format, @router)
58
+ end
59
+
60
+ def start
61
+ super
62
+ end
63
+
64
+ # once fluent v0.14 is released we might be able to use
65
+ # Fluent::Parser::TimeParser, but it doesn't quite do what we want - if gives
66
+ # [sec,nsec] where as we want something we can call `strftime` on...
67
+ class TimeParser
68
+ def initialize(time_key_format, router)
69
+ @time_key_format = time_key_format
70
+ @router = router
71
+ @parser = if time_key_format
72
+ begin
73
+ # Strptime doesn't support all formats, but for those it does it's
74
+ # blazingly fast.
75
+ strptime = Strptime.new(time_key_format)
76
+ Proc.new { |value| strptime.exec(value).to_datetime }
77
+ rescue
78
+ # Can happen if Strptime doesn't recognize the format; or
79
+ # if strptime couldn't be required (because it's not installed -- it's
80
+ # ruby 2 only)
81
+ Proc.new { |value| DateTime.strptime(value, time_key_format) }
82
+ end
83
+ else
84
+ Proc.new { |value| DateTime.parse(value) }
85
+ end
86
+ end
87
+
88
+ def parse(value, event_time)
89
+ @parser.call(value)
90
+ rescue => e
91
+ @router.emit_error_event("Fluent::ElasticsearchOutput::TimeParser.error", Fluent::Engine.now, {'time' => event_time, 'format' => @time_key_format, 'value' => value }, e)
92
+ return Time.at(event_time).to_datetime
93
+ end
94
+ end
95
+
96
+ def client
97
+ @_es ||= begin
98
+ excon_options = { client_key: @client_key, client_cert: @client_cert, client_key_pass: @client_key_pass }
99
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
100
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(get_connection_options.merge(
101
+ options: {
102
+ reload_connections: @reload_connections,
103
+ reload_on_failure: @reload_on_failure,
104
+ resurrect_after: @resurrect_after,
105
+ retry_on_failure: 5,
106
+ transport_options: {
107
+ request: { timeout: @request_timeout },
108
+ ssl: { verify: @ssl_verify, ca_file: @ca_file }
109
+ }
110
+ }), &adapter_conf)
111
+ es = Elasticsearch::Client.new transport: transport
112
+
113
+ begin
114
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})!" unless es.ping
115
+ rescue *es.transport.host_unreachable_exceptions => e
116
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description})! #{e.message}"
117
+ end
118
+
119
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description}"
120
+ es
121
+ end
122
+ end
123
+
124
+ def get_connection_options
125
+ raise "`password` must be present if `user` is present" if @user && !@password
126
+
127
+ hosts = if @hosts
128
+ @hosts.split(',').map do |host_str|
129
+ # Support legacy hosts format host:port,host:port,host:port...
130
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
131
+ {
132
+ host: host_str.split(':')[0],
133
+ port: (host_str.split(':')[1] || @port).to_i,
134
+ scheme: @scheme
135
+ }
136
+ else
137
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
138
+ uri = URI(host_str)
139
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
140
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
141
+ hash
142
+ end
143
+ end
144
+ end.compact
145
+ else
146
+ [{host: @host, port: @port, scheme: @scheme}]
147
+ end.each do |host|
148
+ host.merge!(user: @user, password: @password) if !host[:user] && @user
149
+ host.merge!(path: @path) if !host[:path] && @path
150
+ end
151
+
152
+ {
153
+ hosts: hosts
154
+ }
155
+ end
156
+
157
+ def connection_options_description
158
+ get_connection_options[:hosts].map do |host_info|
159
+ attributes = host_info.dup
160
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
161
+ attributes.inspect
162
+ end.join(', ')
163
+ end
164
+
165
+ def format(tag, time, record)
166
+ [tag, time, record].to_msgpack
167
+ end
168
+
169
+ def shutdown
170
+ super
171
+ end
172
+
173
+ def append_record_to_messages(op, meta, record, msgs)
174
+ case op
175
+ when "update", "upsert"
176
+ if meta.has_key?("_id")
177
+ msgs << { "update" => meta }
178
+ msgs << { "doc" => record, "doc_as_upsert" => op == "upsert" }
179
+ end
180
+ when "create"
181
+ if meta.has_key?("_id")
182
+ msgs << { "create" => meta }
183
+ msgs << record
184
+ end
185
+ when "index"
186
+ msgs << { "index" => meta }
187
+ msgs << record
188
+ end
189
+ end
190
+
191
+ def write(chunk)
192
+ bulk_message = []
193
+
194
+ chunk.msgpack_each do |tag, time, record|
195
+ next unless record.is_a? Hash
196
+ if @target_index_key && record[@target_index_key]
197
+ target_index = record.delete @target_index_key
198
+ elsif @logstash_format
199
+ if record.has_key?("@timestamp")
200
+ dt = record["@timestamp"]
201
+ dt = @time_parser.parse(record["@timestamp"], time)
202
+ elsif record.has_key?(@time_key)
203
+ dt = @time_parser.parse(record[@time_key], time)
204
+ record['@timestamp'] = record[@time_key] unless time_key_exclude_timestamp
205
+ else
206
+ dt = Time.at(time).to_datetime
207
+ record.merge!({"@timestamp" => dt.to_s})
208
+ end
209
+ dt = dt.new_offset(0) if @utc_index
210
+ target_index = "#{@logstash_prefix}-#{dt.strftime(@logstash_dateformat)}"
211
+ else
212
+ target_index = @index_name
213
+ end
214
+
215
+ if @include_tag_key
216
+ record.merge!(@tag_key => tag)
217
+ end
218
+
219
+ meta = {"_index" => target_index, "_type" => type_name}
220
+
221
+ @meta_config_map ||= { 'id_key' => '_id', 'parent_key' => '_parent', 'routing_key' => '_routing' }
222
+ @meta_config_map.each_pair do |config_name, meta_key|
223
+ record_key = self.instance_variable_get("@#{config_name}")
224
+ meta[meta_key] = record[record_key] if record_key && record[record_key]
225
+ end
226
+
227
+ append_record_to_messages(@write_operation, meta, record, bulk_message)
228
+ end
229
+
230
+ send(bulk_message) unless bulk_message.empty?
231
+ bulk_message.clear
232
+ end
233
+
234
+ def send(data)
235
+ retries = 0
236
+ begin
237
+ client.bulk body: data
238
+ rescue *client.transport.host_unreachable_exceptions => e
239
+ if retries < 2
240
+ retries += 1
241
+ @_es = nil
242
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
243
+ sleep 2**retries
244
+ retry
245
+ end
246
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
247
+ end
248
+ end
249
+ end
@@ -0,0 +1,239 @@
1
+ # encoding: UTF-8
2
+ require_relative 'out_elasticsearch'
3
+
4
+ class Fluent::ElasticsearchOutputDynamic < Fluent::ElasticsearchOutput
5
+
6
+ Fluent::Plugin.register_output('elasticsearch_dynamic', self)
7
+
8
+ config_param :delimiter, :string, :default => "."
9
+
10
+ # params overloaded as strings
11
+ config_param :port, :string, :default => "9200"
12
+ config_param :logstash_format, :string, :default => "false"
13
+ config_param :utc_index, :string, :default => "true"
14
+ config_param :time_key_exclude_timestamp, :bool, :default => false
15
+ config_param :reload_connections, :string, :default => "true"
16
+ config_param :reload_on_failure, :string, :default => "false"
17
+ config_param :resurrect_after, :string, :default => "60"
18
+ config_param :ssl_verify, :string, :dfeault => "true"
19
+
20
+ def configure(conf)
21
+ super
22
+
23
+ # evaluate all configurations here
24
+ @dynamic_params = self.instance_variables.select { |var| is_valid_expand_param_type(var) }
25
+ @dynamic_config = Hash.new
26
+ @dynamic_params.each { |var|
27
+ value = expand_param(self.instance_variable_get(var), nil, nil, nil)
28
+ var = var[1..-1]
29
+ @dynamic_config[var] = value
30
+ }
31
+ # end eval all configs
32
+ @current_config = nil
33
+ end
34
+
35
+ def client(host)
36
+
37
+ # check here to see if we already have a client connection for the given host
38
+ connection_options = get_connection_options(host)
39
+
40
+ @_es = nil unless is_existing_connection(connection_options[:hosts])
41
+
42
+ @_es ||= begin
43
+ @current_config = connection_options[:hosts].clone
44
+ excon_options = { client_key: @dynamic_config['client_key'], client_cert: @dynamic_config['client_cert'], client_key_pass: @dynamic_config['client_key_pass'] }
45
+ adapter_conf = lambda {|f| f.adapter :excon, excon_options }
46
+ transport = Elasticsearch::Transport::Transport::HTTP::Faraday.new(connection_options.merge(
47
+ options: {
48
+ reload_connections: @dynamic_config['reload_connections'],
49
+ reload_on_failure: @dynamic_config['reload_on_failure'],
50
+ resurrect_after: @dynamic_config['resurrect_after'].to_i,
51
+ retry_on_failure: 5,
52
+ transport_options: {
53
+ request: { timeout: @dynamic_config['request_timeout'] },
54
+ ssl: { verify: @dynamic_config['ssl_verify'], ca_file: @dynamic_config['ca_file'] }
55
+ }
56
+ }), &adapter_conf)
57
+ es = Elasticsearch::Client.new transport: transport
58
+
59
+ begin
60
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description(host)})!" unless es.ping
61
+ rescue *es.transport.host_unreachable_exceptions => e
62
+ raise ConnectionFailure, "Can not reach Elasticsearch cluster (#{connection_options_description(host)})! #{e.message}"
63
+ end
64
+
65
+ log.info "Connection opened to Elasticsearch cluster => #{connection_options_description(host)}"
66
+ es
67
+ end
68
+ end
69
+
70
+ def get_connection_options(con_host)
71
+ raise "`password` must be present if `user` is present" if @dynamic_config['user'] && !@dynamic_config['password']
72
+
73
+ hosts = if con_host || @dynamic_config['hosts']
74
+ (con_host || @dynamic_config['hosts']).split(',').map do |host_str|
75
+ # Support legacy hosts format host:port,host:port,host:port...
76
+ if host_str.match(%r{^[^:]+(\:\d+)?$})
77
+ {
78
+ host: host_str.split(':')[0],
79
+ port: (host_str.split(':')[1] || @dynamic_config['port']).to_i,
80
+ scheme: @dynamic_config['scheme']
81
+ }
82
+ else
83
+ # New hosts format expects URLs such as http://logs.foo.com,https://john:pass@logs2.foo.com/elastic
84
+ uri = URI(host_str)
85
+ %w(user password path).inject(host: uri.host, port: uri.port, scheme: uri.scheme) do |hash, key|
86
+ hash[key.to_sym] = uri.public_send(key) unless uri.public_send(key).nil? || uri.public_send(key) == ''
87
+ hash
88
+ end
89
+ end
90
+ end.compact
91
+ else
92
+ [{host: @dynamic_config['host'], port: @dynamic_config['port'].to_i, scheme: @dynamic_config['scheme']}]
93
+ end.each do |host|
94
+ host.merge!(user: @dynamic_config['user'], password: @dynamic_config['password']) if !host[:user] && @dynamic_config['user']
95
+ host.merge!(path: @dynamic_config['path']) if !host[:path] && @dynamic_config['path']
96
+ end
97
+
98
+ {
99
+ hosts: hosts
100
+ }
101
+ end
102
+
103
+ def connection_options_description(host)
104
+ get_connection_options(host)[:hosts].map do |host_info|
105
+ attributes = host_info.dup
106
+ attributes[:password] = 'obfuscated' if attributes.has_key?(:password)
107
+ attributes.inspect
108
+ end.join(', ')
109
+ end
110
+
111
+ def write(chunk)
112
+ bulk_message = Hash.new { |h,k| h[k] = [] }
113
+ dynamic_conf = @dynamic_config.clone
114
+
115
+ chunk.msgpack_each do |tag, time, record|
116
+ next unless record.is_a? Hash
117
+
118
+ # evaluate all configurations here
119
+ @dynamic_params.each { |var|
120
+ k = var[1..-1]
121
+ v = self.instance_variable_get(var)
122
+ # check here to determine if we should evaluate
123
+ if dynamic_conf[k] != v
124
+ value = expand_param(v, tag, time, record)
125
+ dynamic_conf[k] = value
126
+ end
127
+ }
128
+ # end eval all configs
129
+
130
+ if eval(dynamic_conf['logstash_format'])
131
+ if record.has_key?("@timestamp")
132
+ time = Time.parse record["@timestamp"]
133
+ elsif record.has_key?(dynamic_conf['time_key'])
134
+ time = Time.parse record[dynamic_conf['time_key']]
135
+ record['@timestamp'] = record[dynamic_conf['time_key']] unless time_key_exclude_timestamp
136
+ else
137
+ record.merge!({"@timestamp" => Time.at(time).to_datetime.to_s})
138
+ end
139
+
140
+ if eval(dynamic_conf['utc_index'])
141
+ target_index = "#{dynamic_conf['logstash_prefix']}-#{Time.at(time).getutc.strftime("#{dynamic_conf['logstash_dateformat']}")}"
142
+ else
143
+ target_index = "#{dynamic_conf['logstash_prefix']}-#{Time.at(time).strftime("#{dynamic_conf['logstash_dateformat']}")}"
144
+ end
145
+ else
146
+ target_index = dynamic_conf['index_name']
147
+ end
148
+
149
+ if @include_tag_key
150
+ record.merge!(dynamic_conf['tag_key'] => tag)
151
+ end
152
+
153
+ meta = {"_index" => target_index, "_type" => dynamic_conf['type_name']}
154
+
155
+ @meta_config_map ||= { 'id_key' => '_id', 'parent_key' => '_parent', 'routing_key' => '_routing' }
156
+ @meta_config_map.each_pair do |config_name, meta_key|
157
+ if dynamic_conf[config_name] && record[dynamic_conf[config_name]]
158
+ meta[meta_key] = record[dynamic_conf[config_name]]
159
+ end
160
+ end
161
+
162
+ if dynamic_conf['hosts']
163
+ host = dynamic_conf['hosts']
164
+ else
165
+ host = "#{dynamic_conf['host']}:#{dynamic_conf['port']}"
166
+ end
167
+
168
+ append_record_to_messages(dynamic_conf["write_operation"], meta, record, bulk_message[host])
169
+ end
170
+
171
+ bulk_message.each do | hKey, array |
172
+ send(array, hKey) unless array.empty?
173
+ array.clear
174
+ end
175
+ end
176
+
177
+ def send(data, host)
178
+ retries = 0
179
+ begin
180
+ client(host).bulk body: data
181
+ rescue *client(host).transport.host_unreachable_exceptions => e
182
+ if retries < 2
183
+ retries += 1
184
+ @_es = nil
185
+ log.warn "Could not push logs to Elasticsearch, resetting connection and trying again. #{e.message}"
186
+ sleep 2**retries
187
+ retry
188
+ end
189
+ raise ConnectionFailure, "Could not push logs to Elasticsearch after #{retries} retries. #{e.message}"
190
+ end
191
+ end
192
+
193
+ def expand_param(param, tag, time, record)
194
+ # check for '${ ... }'
195
+ # yes => `eval`
196
+ # no => return param
197
+ return param if (param =~ /\${.+}/).nil?
198
+
199
+ # check for 'tag_parts[]'
200
+ # separated by a delimiter (default '.')
201
+ tag_parts = tag.split(@delimiter) unless (param =~ /tag_parts\[.+\]/).nil? || tag.nil?
202
+
203
+ # pull out section between ${} then eval
204
+ inner = param.clone
205
+ while inner.match(/\${.+}/)
206
+ to_eval = inner.match(/\${(.+?)}/){$1}
207
+
208
+ if !(to_eval =~ /record\[.+\]/).nil? && record.nil?
209
+ return to_eval
210
+ elsif !(to_eval =~/tag_parts\[.+\]/).nil? && tag_parts.nil?
211
+ return to_eval
212
+ elsif !(to_eval =~/time/).nil? && time.nil?
213
+ return to_eval
214
+ else
215
+ inner.sub!(/\${.+?}/, eval( to_eval ))
216
+ end
217
+ end
218
+ inner
219
+ end
220
+
221
+ def is_valid_expand_param_type(param)
222
+ return false if [:@buffer_type].include?(param)
223
+ return self.instance_variable_get(param).is_a?(String)
224
+ end
225
+
226
+ def is_existing_connection(host)
227
+ # check if the host provided match the current connection
228
+ return false if @_es.nil?
229
+ return false if host.length != @current_config.length
230
+
231
+ for i in 0...host.length
232
+ if !host[i][:host].eql? @current_config[i][:host] || host[i][:port] != @current_config[i][:port]
233
+ return false
234
+ end
235
+ end
236
+
237
+ return true
238
+ end
239
+ end