fluent-plugin-splunk-hec 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,41 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "fluent-plugin-splunk-hec"
3
+ spec.version = File.read("VERSION")
4
+ spec.authors = ["Zhimin (Gimi) Liang"]
5
+ spec.email = ["zliang@splunk.com"]
6
+
7
+ spec.summary = %q{Fluentd plugin for Splunk HEC.}
8
+ spec.description = %q{A fluentd output plugin created by Splunk that writes events to splunk indexers over HTTP Event Collector API.}
9
+ spec.homepage = "https://github.com/splunk/fluent-plugin-splunk-hec"
10
+ spec.license = "Apache-2.0"
11
+
12
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
13
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
14
+ # if spec.respond_to?(:metadata)
15
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
16
+ # else
17
+ # raise "RubyGems 2.0 or newer is required to protect against " \
18
+ # "public gem pushes."
19
+ # end
20
+
21
+ spec.require_paths = ["lib"]
22
+ spec.test_files = Dir.glob('test/**/**.rb')
23
+ spec.files = %w[
24
+ CODE_OF_CONDUCT.md README.md LICENSE
25
+ fluent-plugin-splunk-hec.gemspec
26
+ Gemfile Gemfile.lock
27
+ Rakefile VERSION
28
+ ] + Dir.glob('lib/**/**').reject(&File.method(:directory?))
29
+
30
+ spec.required_ruby_version = '>= 2.4.0'
31
+
32
+ spec.add_runtime_dependency "fluentd", "~> 1.0"
33
+ spec.add_runtime_dependency "multi_json", "~> 1.13"
34
+ spec.add_runtime_dependency "net-http-persistent", "~> 3.0"
35
+
36
+ spec.add_development_dependency "bundler", "~> 1.16"
37
+ spec.add_development_dependency "rake", "~> 10.0"
38
+ spec.add_development_dependency "test-unit", "~> 3.0" # required by fluent/test.rb
39
+ spec.add_development_dependency "minitest", "~> 5.0"
40
+ spec.add_development_dependency "webmock", "~> 3.2"
41
+ end
@@ -0,0 +1,395 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fluent/plugin/output"
4
+ require "fluent/plugin/formatter"
5
+
6
+ require 'openssl'
7
+ require 'multi_json'
8
+ require 'net/http/persistent'
9
+
10
+ module Fluent::Plugin
11
+ class SplunkHecOutput < Fluent::Plugin::Output
12
+ Fluent::Plugin.register_output('splunk_hec', self)
13
+
14
+ helpers :formatter
15
+
16
+ autoload :VERSION, "fluent/plugin/out_splunk_hec/version"
17
+ autoload :MatchFormatter, "fluent/plugin/out_splunk_hec/match_formatter"
18
+
19
+ KEY_FIELDS = %w[index host source sourcetype metric_name metric_value].freeze
20
+ TAG_PLACEHOLDER = '${tag}'.freeze
21
+
22
+ MISSING_FIELD = Hash.new { |h, k|
23
+ $log.warn "expected field #{k} but it's missing" if defined?($log)
24
+ MISSING_FIELD
25
+ }.freeze
26
+
27
+ desc 'Protocol to use to call HEC API.'
28
+ config_param :protocol, :enum, list: %i[http https], default: :https
29
+
30
+ desc 'The hostname/IP to HEC, or HEC load balancer.'
31
+ config_param :hec_host, :string
32
+
33
+ desc 'The port number to HEC, or HEC load balancer.'
34
+ config_param :hec_port, :integer, default: 8088
35
+
36
+ desc 'The HEC token.'
37
+ config_param :hec_token, :string
38
+
39
+ desc 'The path to a file containing a PEM-format CA certificate for this client.'
40
+ config_param :client_cert, :string, default: nil
41
+
42
+ desc 'The private key for this client.'
43
+ config_param :client_key, :string, default: nil
44
+
45
+ desc 'The path to a file containing a PEM-format CA certificate.'
46
+ config_param :ca_file, :string, default: nil
47
+
48
+ desc 'The path to a directory containing CA certificates in PEM format.'
49
+ config_param :ca_path, :string, default: nil
50
+
51
+ desc 'List of SSL ciphers allowed.'
52
+ config_param :ssl_ciphers, :array, default: nil
53
+
54
+ desc 'Indicates if insecure SSL connection is allowed.'
55
+ config_param :insecure_ssl, :bool, default: false
56
+
57
+ desc 'Type of data sending to Splunk, `event` or `metric`. `metric` type is supported since Splunk 7.0. To use `metric` type, make sure the index is a metric index.'
58
+ config_param :data_type, :enum, list: %i[event metric], default: :event
59
+
60
+ desc 'The Splunk index to index events. When not set, will be decided by HEC. This is exclusive with `index_key`'
61
+ config_param :index, :string, default: nil
62
+
63
+ desc 'Field name to contain Splunk index name. This is exclusive with `index`.'
64
+ config_param :index_key, :string, default: nil
65
+
66
+ desc "The host field for events, by default it uses the hostname of the machine that runnning fluentd. This is exclusive with `host_key`."
67
+ config_param :host, :string, default: nil
68
+
69
+ desc 'Field name to contain host. This is exclusive with `host`.'
70
+ config_param :host_key, :string, default: nil
71
+
72
+ desc 'The source field for events, when not set, will be decided by HEC. This is exclusive with `source_key`.'
73
+ config_param :source, :string, default: nil
74
+
75
+ desc 'Field name to contain source. This is exclusive with `source`.'
76
+ config_param :source_key, :string, default: nil
77
+
78
+ desc 'The sourcetype field for events, when not set, will be decided by HEC. This is exclusive with `sourcetype_key`.'
79
+ config_param :sourcetype, :string, default: nil
80
+
81
+ desc 'Field name to contain sourcetype. This is exclusive with `sourcetype`.'
82
+ config_param :sourcetype_key, :string, default: nil
83
+
84
+ desc 'When `data_type` is set to "metric", by default it will treat every key-value pair in the income event as a metric name-metric value pair. Set `metrics_from_event` to `false` to disable this behavior and use `metric_name_key` and `metric_value_key` to define metrics.'
85
+ config_param :metrics_from_event, :bool, default: true
86
+
87
+ desc "Field name to contain metric name. This is exclusive with `metrics_from_event`, when this is set, `metrics_from_event` will be set to `false`."
88
+ config_param :metric_name_key, :string, default: nil
89
+
90
+ desc "Field name to contain metric value, this is required when `metric_name_key` is set."
91
+ config_param :metric_value_key, :string, default: nil
92
+
93
+ desc 'When set to true, all fields defined in `index_key`, `host_key`, `source_key`, `sourcetype_key`, `metric_name_key`, `metric_value_key` will not be removed from the original event.'
94
+ config_param :keep_keys, :bool, default: false
95
+
96
+ desc 'Define index-time fields for event data type, or metric dimensions for metric data type. Null value fields will be removed.'
97
+ config_section :fields, init: false, multi: false, required: false do
98
+ # this is blank on purpose
99
+ end
100
+
101
+ config_section :format do
102
+ config_set_default :usage, '**'
103
+ config_set_default :@type, 'json'
104
+ config_set_default :add_newline, false
105
+ end
106
+
107
+ desc <<~DESC
108
+ Whether to allow non-UTF-8 characters in user logs. If set to true, any
109
+ non-UTF-8 character would be replaced by the string specified by
110
+ `non_utf8_replacement_string`. If set to false, any non-UTF-8 character
111
+ would trigger the plugin to error out.
112
+ DESC
113
+ config_param :coerce_to_utf8, :bool, :default => true
114
+
115
+ desc <<~DESC
116
+ If `coerce_to_utf8` is set to true, any non-UTF-8 character would be
117
+ replaced by the string specified here.
118
+ DESC
119
+ config_param :non_utf8_replacement_string, :string, :default => ' '
120
+
121
+ def initialize
122
+ super
123
+ @default_host = Socket.gethostname
124
+ @chunk_queue = SizedQueue.new 1
125
+ @extra_fields = nil
126
+ end
127
+
128
+ def configure(conf)
129
+ super
130
+
131
+ check_conflict
132
+ check_metric_configs
133
+ construct_api
134
+ prepare_key_fields
135
+ configure_fields(conf)
136
+ pick_custom_format_method
137
+
138
+ # @formatter_configs is from formatter helper
139
+ @formatters = @formatter_configs.map { |section|
140
+ MatchFormatter.new section.usage, formatter_create(usage: section.usage)
141
+ }
142
+ end
143
+
144
+ def start
145
+ super
146
+ start_worker_threads
147
+ end
148
+
149
+ def format(tag, time, record)
150
+ # this method will be replaced in `configure`
151
+ end
152
+
153
+ def try_write(chunk)
154
+ log.debug { "Received new chunk, size=#{chunk.read.bytesize}" }
155
+ @chunk_queue << chunk
156
+ end
157
+
158
+ def close
159
+ @chunk_queue.close
160
+ super
161
+ end
162
+
163
+ def multi_workers_ready?
164
+ true
165
+ end
166
+
167
+ private
168
+
169
+ def check_conflict
170
+ KEY_FIELDS.each { |f|
171
+ kf = "#{f}_key"
172
+ raise Fluent::ConfigError, "Can not set #{f} and #{kf} at the same time." \
173
+ if %W[@#{f} @#{kf}].all? &method(:instance_variable_get)
174
+ }
175
+ end
176
+
177
+ def check_metric_configs
178
+ return unless @data_type == :metric
179
+
180
+ @metrics_from_event = false if @metric_name_key
181
+
182
+ return if @metrics_from_event
183
+
184
+ raise Fluent::ConfigError, "`metric_name_key` is required when `metrics_from_event` is `false`." unless @metric_name_key
185
+
186
+ raise Fluent::ConfigError, "`metric_value_key` is required when `metric_name_key` is set." unless @metric_value_key
187
+ end
188
+
189
+ def prepare_key_fields
190
+ KEY_FIELDS.each { |f|
191
+ v = instance_variable_get "@#{f}_key"
192
+ if v
193
+ attrs = v.split('.').freeze
194
+ if @keep_keys
195
+ instance_variable_set "@#{f}", ->(_, record) { attrs.inject(record) { |o, k| o[k] } }
196
+ else
197
+ instance_variable_set "@#{f}", ->(_, record) {
198
+ attrs[0...-1].inject(record) { |o, k| o[k] }.delete(attrs[-1])
199
+ }
200
+ end
201
+ else
202
+ v = instance_variable_get "@#{f}"
203
+ next unless v
204
+
205
+ if v == TAG_PLACEHOLDER
206
+ instance_variable_set "@#{f}", ->(tag, _) { tag }
207
+ else
208
+ instance_variable_set "@#{f}", ->(_, _) { v }
209
+ end
210
+ end
211
+ }
212
+ end
213
+
214
+ # <fields> directive, which defines:
215
+ # * when data_type is event, index-time fields
216
+ # * when data_type is metric, metric dimensions
217
+ def configure_fields(conf)
218
+ # This loop looks dump, but it is used to suppress the unused parameter configuration warning
219
+ # Learned from `filter_record_transformer`.
220
+ conf.elements.select { |element| element.name == 'fields' }.each do |element|
221
+ element.each_pair { |k, v| element.has_key?(k) }
222
+ end
223
+
224
+ return unless @fields
225
+
226
+ @extra_fields = @fields.corresponding_config_element.map { |k, v|
227
+ [k, v.empty? ? k : v]
228
+ }.to_h
229
+ end
230
+
231
+ def pick_custom_format_method
232
+ if @data_type == :event
233
+ define_singleton_method :format, method(:format_event)
234
+ else
235
+ define_singleton_method :format, method(:format_metric)
236
+ end
237
+ end
238
+
239
+ def format_event(tag, time, record)
240
+ MultiJson.dump({
241
+ host: @host ? @host.(tag, record) : @default_host,
242
+ time: time.to_i
243
+ }.tap { |payload|
244
+ payload[:index] = @index.(tag, record) if @index
245
+ payload[:source] = @source.(tag, record) if @source
246
+ payload[:sourcetype] = @sourcetype.(tag, record) if @sourcetype
247
+
248
+ # delete nil fields otherwise will get formet error from HEC
249
+ %i[host index source sourcetype].each { |f| payload.delete f if payload[f].nil? }
250
+
251
+ if @extra_fields
252
+ payload[:fields] = @extra_fields.map { |name, field| [name, record[field]] }.to_h
253
+ payload[:fields].compact!
254
+ # if a field is already in indexed fields, then remove it from the original event
255
+ @extra_fields.values.each { |field| record.delete field }
256
+ end
257
+ if formatter = @formatters.find { |f| f.match? tag }
258
+ record = formatter.format(tag, time, record)
259
+ end
260
+ payload[:event] = convert_to_utf8 record
261
+ })
262
+ end
263
+
264
+ def format_metric(tag, time, record)
265
+ payload = {
266
+ host: @host ? @host.(tag, record) : @default_host,
267
+ time: time.to_i,
268
+ event: 'metric'
269
+ }
270
+ payload[:index] = @index.(tag, record) if @index
271
+ payload[:source] = @source.(tag, record) if @source
272
+ payload[:sourcetype] = @sourcetype.(tag, record) if @sourcetype
273
+
274
+ if not @metrics_from_event
275
+ fields = {
276
+ metric_name: @metric_name.(tag, record),
277
+ _value: @metric_value.(tag, record)
278
+ }
279
+
280
+ if @extra_fields
281
+ fields.update @extra_fields.map { |name, field| [name, record[field]] }.to_h
282
+ else
283
+ fields.update record
284
+ end
285
+
286
+ fields.compact!
287
+
288
+ payload[:fields] = convert_to_utf8 fields
289
+
290
+ return MultiJson.dump(payload)
291
+ end
292
+
293
+ # when metrics_from_event is true, generate one metric event for each key-value in record
294
+ payloads = record.map { |key, value|
295
+ {fields: {metric_name: key, _value: value}}.merge! payload
296
+ }
297
+
298
+ payloads.map!(&MultiJson.method(:dump)).join
299
+ end
300
+
301
+ def construct_api
302
+ @hec_api = URI("#{@protocol}://#{@hec_host}:#{@hec_port}/services/collector")
303
+ rescue
304
+ raise Fluent::ConfigError, "hec_host (#{@hec_host}) and/or hec_port (#{@hec_port}) are invalid."
305
+ end
306
+
307
+ def start_worker_threads
308
+ thread_create :"hec_worker_#{@hec_api}" do
309
+ http = new_connection
310
+ while chunk = get_next_chunk
311
+ send_to_hec http, chunk
312
+ end
313
+ end
314
+ end
315
+
316
+ def get_next_chunk
317
+ @chunk_queue.pop @chunk_queue.closed?
318
+ rescue ThreadError # see SizedQueue#pop doc
319
+ nil
320
+ end
321
+
322
+ def new_connection
323
+ Net::HTTP::Persistent.new.tap do |c|
324
+ c.verify_mode = @insecure_ssl ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER
325
+ c.cert = OpenSSL::X509::Certificate.new File.read(@client_cert) if @client_cert
326
+ c.key = OpenSSL::PKey::RSA.new File.read(@client_key) if @client_key
327
+ c.ca_file = @ca_file
328
+ c.ca_path = @ca_path
329
+ c.ciphers = @ssl_ciphers
330
+
331
+ c.override_headers['Content-Type'] = 'application/json'
332
+ c.override_headers['User-Agent'] = "fluent-plugin-splunk_hec_out/#{VERSION}"
333
+ c.override_headers['Authorization'] = "Splunk #{@hec_token}"
334
+ end
335
+ end
336
+
337
+ def send_to_hec(http, chunk)
338
+ post = Net::HTTP::Post.new @hec_api.request_uri
339
+ post.body = chunk.read
340
+ log.debug { "Sending #{post.body.bytesize} bytes to Splunk." }
341
+
342
+ log.trace { "POST #{@hec_api} body=#{post.body}" }
343
+ response = http.request @hec_api, post
344
+ log.debug { "[Response] POST #{@hec_api}: #{response.inspect}" }
345
+
346
+ # raise Exception to utilize Fluentd output plugin retry machanism
347
+ raise "Server error for POST #{@hec_api}, response: #{response.body}" if response.code.start_with?('5')
348
+
349
+ # For both success response (2xx) and client errors (4xx), we will consume the chunk.
350
+ # Because there probably a bug in the code if we get 4xx errors, retry won't do any good.
351
+ commit_write(chunk.unique_id)
352
+ if not response.code.start_with?('2')
353
+ log.error "Failed POST to #{@hec_api}, response: #{response.body}"
354
+ log.debug { "Failed request body: #{post.body}" }
355
+ end
356
+ end
357
+
358
+ # Encode as UTF-8. If 'coerce_to_utf8' is set to true in the config, any
359
+ # non-UTF-8 character would be replaced by the string specified by
360
+ # 'non_utf8_replacement_string'. If 'coerce_to_utf8' is set to false, any
361
+ # non-UTF-8 character would trigger the plugin to error out.
362
+ # Thanks to
363
+ # https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud/blob/dbc28575/lib/fluent/plugin/out_google_cloud.rb#L1284
364
+ def convert_to_utf8(input)
365
+ if input.is_a?(Hash)
366
+ record = {}
367
+ input.each do |key, value|
368
+ record[convert_to_utf8(key)] = convert_to_utf8(value)
369
+ end
370
+
371
+ return record
372
+ end
373
+ return input.map { |value| convert_to_utf8(value) } if input.is_a?(Array)
374
+ return input unless input.respond_to?(:encode)
375
+
376
+ if @coerce_to_utf8
377
+ input.encode(
378
+ 'utf-8',
379
+ invalid: :replace,
380
+ undef: :replace,
381
+ replace: @non_utf8_replacement_string)
382
+ else
383
+ begin
384
+ input.encode('utf-8')
385
+ rescue EncodingError
386
+ log.error { 'Encountered encoding issues potentially due to non ' \
387
+ 'UTF-8 characters. To allow non-UTF-8 characters and ' \
388
+ 'replace them with spaces, please set "coerce_to_utf8" ' \
389
+ 'to true.' }
390
+ raise
391
+ end
392
+ end
393
+ end
394
+ end
395
+ end