logstash-output-elasticsearch 10.8.1-java → 11.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +1 -1
  4. data/docs/index.asciidoc +282 -114
  5. data/lib/logstash/outputs/elasticsearch.rb +125 -65
  6. data/lib/logstash/outputs/elasticsearch/data_stream_support.rb +233 -0
  7. data/lib/logstash/outputs/elasticsearch/http_client.rb +59 -21
  8. data/lib/logstash/outputs/elasticsearch/http_client/pool.rb +47 -34
  9. data/lib/logstash/outputs/elasticsearch/ilm.rb +11 -12
  10. data/lib/logstash/outputs/elasticsearch/license_checker.rb +19 -22
  11. data/lib/logstash/outputs/elasticsearch/template_manager.rb +3 -5
  12. data/lib/logstash/plugin_mixins/elasticsearch/api_configs.rb +157 -153
  13. data/lib/logstash/plugin_mixins/elasticsearch/common.rb +80 -60
  14. data/logstash-output-elasticsearch.gemspec +2 -2
  15. data/spec/es_spec_helper.rb +3 -6
  16. data/spec/integration/outputs/data_stream_spec.rb +61 -0
  17. data/spec/integration/outputs/ilm_spec.rb +22 -18
  18. data/spec/integration/outputs/ingest_pipeline_spec.rb +4 -2
  19. data/spec/integration/outputs/retry_spec.rb +14 -2
  20. data/spec/integration/outputs/sniffer_spec.rb +0 -1
  21. data/spec/spec_helper.rb +14 -0
  22. data/spec/unit/http_client_builder_spec.rb +9 -9
  23. data/spec/unit/outputs/elasticsearch/data_stream_support_spec.rb +542 -0
  24. data/spec/unit/outputs/elasticsearch/http_client/manticore_adapter_spec.rb +1 -0
  25. data/spec/unit/outputs/elasticsearch/http_client/pool_spec.rb +27 -13
  26. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +59 -41
  27. data/spec/unit/outputs/elasticsearch/template_manager_spec.rb +1 -3
  28. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +4 -5
  29. data/spec/unit/outputs/elasticsearch_spec.rb +228 -38
  30. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +1 -2
  31. data/spec/unit/outputs/error_whitelist_spec.rb +4 -3
  32. data/spec/unit/outputs/license_check_spec.rb +0 -16
  33. metadata +23 -16
@@ -3,8 +3,8 @@ require "logstash/namespace"
3
3
  require "logstash/environment"
4
4
  require "logstash/outputs/base"
5
5
  require "logstash/json"
6
- require "concurrent"
7
- require "stud/buffer"
6
+ require "concurrent/atomic/atomic_boolean"
7
+ require "stud/interval"
8
8
  require "socket" # for Socket.gethostname
9
9
  require "thread" # for safe queueing
10
10
  require "uri" # for escaping user input
@@ -92,6 +92,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
92
92
  require "logstash/plugin_mixins/elasticsearch/api_configs"
93
93
  require "logstash/plugin_mixins/elasticsearch/common"
94
94
  require "logstash/outputs/elasticsearch/ilm"
95
+ require "logstash/outputs/elasticsearch/data_stream_support"
95
96
  require 'logstash/plugin_mixins/ecs_compatibility_support'
96
97
 
97
98
  # Protocol agnostic methods
@@ -106,6 +107,9 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
106
107
  # Generic/API config options that any document indexer output needs
107
108
  include(LogStash::PluginMixins::ElasticSearch::APIConfigs)
108
109
 
110
+ # DS support
111
+ include(LogStash::Outputs::ElasticSearch::DataStreamSupport)
112
+
109
113
  DEFAULT_POLICY = "logstash-policy"
110
114
 
111
115
  config_name "elasticsearch"
@@ -122,7 +126,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
122
126
  # would use the foo field for the action
123
127
  #
124
128
  # For more details on actions, check out the http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html[Elasticsearch bulk API documentation]
125
- config :action, :validate => :string, :default => "index"
129
+ config :action, :validate => :string # :default => "index" unless data_stream
126
130
 
127
131
  # The index to write events to. This can be dynamic using the `%{foo}` syntax.
128
132
  # The default value will partition your indices by day so you can more easily
@@ -247,6 +251,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
247
251
  # ILM policy to use, if undefined the default policy will be used.
248
252
  config :ilm_policy, :validate => :string, :default => DEFAULT_POLICY
249
253
 
254
+ attr_reader :client
250
255
  attr_reader :default_index
251
256
  attr_reader :default_ilm_rollover_alias
252
257
  attr_reader :default_template_name
@@ -257,26 +262,53 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
257
262
  end
258
263
 
259
264
  def register
260
- @template_installed = Concurrent::AtomicBoolean.new(false)
265
+ @after_successful_connection_done = Concurrent::AtomicBoolean.new(false)
261
266
  @stopping = Concurrent::AtomicBoolean.new(false)
262
- # To support BWC, we check if DLQ exists in core (< 5.4). If it doesn't, we use nil to resort to previous behavior.
263
- @dlq_writer = dlq_enabled? ? execution_context.dlq_writer : nil
264
267
 
265
268
  check_action_validity
266
269
 
270
+ @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts.map(&:sanitized).map(&:to_s))
271
+
267
272
  # the license_checking behaviour in the Pool class is externalized in the LogStash::ElasticSearchOutputLicenseChecker
268
273
  # class defined in license_check.rb. This license checking is specific to the elasticsearch output here and passed
269
274
  # to build_client down to the Pool class.
270
- build_client(LicenseChecker.new(@logger))
275
+ @client = build_client(LicenseChecker.new(@logger))
276
+
277
+ @after_successful_connection_thread = after_successful_connection do
278
+ begin
279
+ finish_register
280
+ true # thread.value
281
+ rescue => e
282
+ # we do not want to halt the thread with an exception as that has consequences for LS
283
+ e # thread.value
284
+ ensure
285
+ @after_successful_connection_done.make_true
286
+ end
287
+ end
271
288
 
272
- @template_installer = setup_after_successful_connection do
273
- discover_cluster_uuid
274
- install_template
275
- setup_ilm if ilm_in_use?
289
+ # To support BWC, we check if DLQ exists in core (< 5.4). If it doesn't, we use nil to resort to previous behavior.
290
+ @dlq_writer = dlq_enabled? ? execution_context.dlq_writer : nil
291
+
292
+ if data_stream_config?
293
+ @event_mapper = -> (e) { data_stream_event_action_tuple(e) }
294
+ @event_target = -> (e) { data_stream_name(e) }
295
+ @index = "#{data_stream_type}-#{data_stream_dataset}-#{data_stream_namespace}".freeze # default name
296
+ else
297
+ @event_mapper = -> (e) { event_action_tuple(e) }
298
+ @event_target = -> (e) { e.sprintf(@index) }
276
299
  end
300
+
277
301
  @bulk_request_metrics = metric.namespace(:bulk_requests)
278
302
  @document_level_metrics = metric.namespace(:documents)
279
- @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts.map(&:sanitized).map(&:to_s))
303
+ end
304
+
305
+ # @override post-register when ES connection established
306
+ def finish_register
307
+ assert_es_version_supports_data_streams if data_stream_config?
308
+ discover_cluster_uuid
309
+ install_template
310
+ setup_ilm if ilm_in_use?
311
+ super
280
312
  end
281
313
 
282
314
  # @override to handle proxy => '' as if none was set
@@ -297,46 +329,47 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
297
329
 
298
330
  # Receive an array of events and immediately attempt to index them (no buffering)
299
331
  def multi_receive(events)
300
- until @template_installed.true?
301
- sleep 1
332
+ wait_for_successful_connection if @after_successful_connection_done
333
+ retrying_submit map_events(events)
334
+ end
335
+
336
+ def map_events(events)
337
+ events.map(&@event_mapper)
338
+ end
339
+
340
+ def wait_for_successful_connection
341
+ after_successful_connection_done = @after_successful_connection_done
342
+ return unless after_successful_connection_done
343
+ stoppable_sleep 1 until after_successful_connection_done.true?
344
+
345
+ status = @after_successful_connection_thread && @after_successful_connection_thread.value
346
+ if status.is_a?(Exception) # check if thread 'halted' with an error
347
+ # keep logging that something isn't right (from every #multi_receive)
348
+ @logger.error "Elasticsearch setup did not complete normally, please review previously logged errors",
349
+ message: status.message, exception: status.class
350
+ else
351
+ @after_successful_connection_done = nil # do not execute __method__ again if all went well
302
352
  end
303
- retrying_submit(events.map {|e| event_action_tuple(e)})
304
353
  end
354
+ private :wait_for_successful_connection
305
355
 
306
356
  def close
307
357
  @stopping.make_true if @stopping
308
- stop_template_installer
358
+ stop_after_successful_connection_thread
309
359
  @client.close if @client
310
360
  end
311
361
 
312
- # not private because used by ILM specs
313
- def stop_template_installer
314
- @template_installer.join unless @template_installer.nil?
362
+ private
363
+
364
+ def stop_after_successful_connection_thread
365
+ @after_successful_connection_thread.join unless @after_successful_connection_thread.nil?
315
366
  end
316
367
 
317
- # not private for elasticsearch_spec.rb
318
- # Convert the event into a 3-tuple of action, params, and event
368
+ # Convert the event into a 3-tuple of action, params and event hash
319
369
  def event_action_tuple(event)
320
- action = event.sprintf(@action)
321
-
322
- params = {
323
- :_id => @document_id ? event.sprintf(@document_id) : nil,
324
- :_index => event.sprintf(@index),
325
- routing_field_name => @routing ? event.sprintf(@routing) : nil
326
- }
327
-
370
+ params = common_event_params(event)
328
371
  params[:_type] = get_event_type(event) if use_event_type?(nil)
329
372
 
330
- if @pipeline
331
- value = event.sprintf(@pipeline)
332
- # convention: empty string equates to not using a pipeline
333
- # this is useful when using a field reference in the pipeline setting, e.g.
334
- # elasticsearch {
335
- # pipeline => "%{[@metadata][pipeline]}"
336
- # }
337
- params[:pipeline] = value unless value.empty?
338
- end
339
-
340
373
  if @parent
341
374
  if @join_field
342
375
  join_value = event.get(@join_field)
@@ -348,26 +381,40 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
348
381
  end
349
382
  end
350
383
 
384
+ action = event.sprintf(@action || 'index')
385
+
351
386
  if action == 'update'
352
387
  params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @upsert != ""
353
388
  params[:_script] = event.sprintf(@script) if @script != ""
354
389
  params[retry_on_conflict_action_name] = @retry_on_conflict
355
390
  end
356
391
 
357
- if @version
358
- params[:version] = event.sprintf(@version)
359
- end
360
-
361
- if @version_type
362
- params[:version_type] = event.sprintf(@version_type)
363
- end
392
+ params[:version] = event.sprintf(@version) if @version
393
+ params[:version_type] = event.sprintf(@version_type) if @version_type
364
394
 
365
- [action, params, event]
395
+ [action, params, event.to_hash]
366
396
  end
367
397
 
368
- # not private for elasticsearch_spec.rb
369
- def retry_on_conflict_action_name
370
- maximum_seen_major_version >= 7 ? :retry_on_conflict : :_retry_on_conflict
398
+ # @return Hash (initial) parameters for given event
399
+ # @private shared event params factory between index and data_stream mode
400
+ def common_event_params(event)
401
+ params = {
402
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
403
+ :_index => @event_target.call(event),
404
+ routing_field_name => @routing ? event.sprintf(@routing) : nil
405
+ }
406
+
407
+ if @pipeline
408
+ value = event.sprintf(@pipeline)
409
+ # convention: empty string equates to not using a pipeline
410
+ # this is useful when using a field reference in the pipeline setting, e.g.
411
+ # elasticsearch {
412
+ # pipeline => "%{[@metadata][pipeline]}"
413
+ # }
414
+ params[:pipeline] = value unless value.empty?
415
+ end
416
+
417
+ params
371
418
  end
372
419
 
373
420
  @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-elasticsearch-/ }
@@ -377,38 +424,47 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
377
424
  require "logstash/outputs/elasticsearch/#{name}"
378
425
  end
379
426
 
380
- private
427
+ def retry_on_conflict_action_name
428
+ maximum_seen_major_version >= 7 ? :retry_on_conflict : :_retry_on_conflict
429
+ end
381
430
 
382
431
  def routing_field_name
383
432
  maximum_seen_major_version >= 6 ? :routing : :_routing
384
433
  end
385
434
 
386
435
  # Determine the correct value for the 'type' field for the given event
387
- DEFAULT_EVENT_TYPE_ES6="doc".freeze
388
- DEFAULT_EVENT_TYPE_ES7="_doc".freeze
436
+ DEFAULT_EVENT_TYPE_ES6 = "doc".freeze
437
+ DEFAULT_EVENT_TYPE_ES7 = "_doc".freeze
438
+
389
439
  def get_event_type(event)
390
440
  # Set the 'type' value for the index.
391
441
  type = if @document_type
392
442
  event.sprintf(@document_type)
393
443
  else
394
- if maximum_seen_major_version < 6
395
- event.get("type") || DEFAULT_EVENT_TYPE_ES6
396
- elsif maximum_seen_major_version == 6
444
+ major_version = maximum_seen_major_version
445
+ if major_version < 6
446
+ es5_event_type(event)
447
+ elsif major_version == 6
397
448
  DEFAULT_EVENT_TYPE_ES6
398
- elsif maximum_seen_major_version == 7
449
+ elsif major_version == 7
399
450
  DEFAULT_EVENT_TYPE_ES7
400
451
  else
401
452
  nil
402
453
  end
403
454
  end
404
455
 
405
- if !(type.is_a?(String) || type.is_a?(Numeric))
406
- @logger.warn("Bad event type! Non-string/integer type value set!", :type_class => type.class, :type_value => type.to_s, :event => event)
407
- end
408
-
409
456
  type.to_s
410
457
  end
411
458
 
459
+ def es5_event_type(event)
460
+ type = event.get('type')
461
+ return DEFAULT_EVENT_TYPE_ES6 unless type
462
+ if !type.is_a?(String) && !type.is_a?(Numeric)
463
+ @logger.warn("Bad event type (non-string/integer type value set)", :type_class => type.class, :type_value => type, :event => event.to_hash)
464
+ end
465
+ type
466
+ end
467
+
412
468
  ##
413
469
  # WARNING: This method is overridden in a subclass in Logstash Core 7.7-7.8's monitoring,
414
470
  # where a `client` argument is both required and ignored. In later versions of
@@ -417,12 +473,15 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
417
473
  # @param noop_required_client [nil]: required `nil` for legacy reasons.
418
474
  # @return [Boolean]
419
475
  def use_event_type?(noop_required_client)
420
- maximum_seen_major_version < 8
476
+ # always set type for ES <= 6
477
+ # for ES 7 only set it if the user defined it
478
+ (maximum_seen_major_version < 7) || (maximum_seen_major_version == 7 && @document_type)
421
479
  end
422
480
 
423
481
  def install_template
424
482
  TemplateManager.install_template(self)
425
- @template_installed.make_true
483
+ rescue => e
484
+ @logger.error("Failed to install template", message: e.message, exception: e.class, backtrace: e.backtrace)
426
485
  end
427
486
 
428
487
  def setup_ecs_compatibility_related_defaults
@@ -445,13 +504,14 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
445
504
  end
446
505
 
447
506
  # To be overidden by the -java version
448
- VALID_HTTP_ACTIONS=["index", "delete", "create", "update"]
507
+ VALID_HTTP_ACTIONS = ["index", "delete", "create", "update"]
449
508
  def valid_actions
450
509
  VALID_HTTP_ACTIONS
451
510
  end
452
511
 
453
512
  def check_action_validity
454
- raise LogStash::ConfigurationError, "No action specified!" unless @action
513
+ return if @action.nil? # not set
514
+ raise LogStash::ConfigurationError, "No action specified!" if @action.empty?
455
515
 
456
516
  # If we're using string interpolation, we're good!
457
517
  return if @action =~ /%{.+}/
@@ -0,0 +1,233 @@
1
+ module LogStash module Outputs class ElasticSearch
2
+ # DS specific behavior/configuration.
3
+ module DataStreamSupport
4
+
5
+ def self.included(base)
6
+ # Defines whether data will be indexed into an Elasticsearch data stream,
7
+ # `data_stream_*` settings will only be used if this setting is enabled!
8
+ # This setting supports values `true`, `false`, and `auto`.
9
+ # Defaults to `false` in Logstash 7.x and `auto` starting in Logstash 8.0.
10
+ base.config :data_stream, :validate => ['true', 'false', 'auto']
11
+
12
+ base.config :data_stream_type, :validate => ['logs', 'metrics', 'synthetics'], :default => 'logs'
13
+ base.config :data_stream_dataset, :validate => :dataset_identifier, :default => 'generic'
14
+ base.config :data_stream_namespace, :validate => :namespace_identifier, :default => 'default'
15
+
16
+ base.config :data_stream_sync_fields, :validate => :boolean, :default => true
17
+ base.config :data_stream_auto_routing, :validate => :boolean, :default => true
18
+
19
+ base.extend(Validator)
20
+ end
21
+
22
+ # @note assumes to be running AFTER {after_successful_connection} completed, due ES version checks
23
+ def data_stream_config?
24
+ @data_stream_config.nil? ? @data_stream_config = check_data_stream_config! : @data_stream_config
25
+ end
26
+
27
+ private
28
+
29
+ def data_stream_name(event)
30
+ data_stream = event.get('data_stream')
31
+ return @index if !data_stream_auto_routing || !data_stream.is_a?(Hash)
32
+
33
+ type = data_stream['type'] || data_stream_type
34
+ dataset = data_stream['dataset'] || data_stream_dataset
35
+ namespace = data_stream['namespace'] || data_stream_namespace
36
+ "#{type}-#{dataset}-#{namespace}"
37
+ end
38
+
39
+ # @param params the user configuration for the ES output
40
+ # @note LS initialized configuration (with filled defaults) won't detect as data-stream
41
+ # compatible, only explicit (`original_params`) config should be tested.
42
+ # @return [TrueClass|FalseClass] whether given configuration is data-stream compatible
43
+ def check_data_stream_config!(params = original_params)
44
+ data_stream_params = params.select { |name, _| name.start_with?('data_stream_') } # exclude data_stream =>
45
+ invalid_data_stream_params = invalid_data_stream_params(params)
46
+
47
+ case data_stream_explicit_value
48
+ when false
49
+ if data_stream_params.any?
50
+ @logger.error "Ambiguous configuration; data stream settings must not be present when data streams is disabled (caused by: `data_stream => false`)", data_stream_params
51
+ raise LogStash::ConfigurationError, "Ambiguous configuration, please remove data stream specific settings: #{data_stream_params.keys}"
52
+ end
53
+ return false
54
+ when true
55
+ if invalid_data_stream_params.any?
56
+ @logger.error "Invalid data stream configuration, following parameters are not supported:", invalid_data_stream_params
57
+ raise LogStash::ConfigurationError, "Invalid data stream configuration: #{invalid_data_stream_params.keys}"
58
+ end
59
+ return true
60
+ else
61
+ use_data_stream = data_stream_default(data_stream_params, invalid_data_stream_params.empty?)
62
+ if !use_data_stream && data_stream_params.any?
63
+ # DS (auto) disabled but there's still some data-stream parameters (and no `data_stream => false`)
64
+ @logger.error "Ambiguous configuration; data stream settings are present, but data streams are not enabled", data_stream_params
65
+ raise LogStash::ConfigurationError, "Ambiguous configuration, please set data_stream => true " +
66
+ "or remove data stream specific settings: #{data_stream_params.keys}"
67
+ end
68
+ use_data_stream
69
+ end
70
+ end
71
+
72
+ def data_stream_explicit_value
73
+ case @data_stream
74
+ when 'true'
75
+ return true
76
+ when 'false'
77
+ return false
78
+ else
79
+ return nil # 'auto' or not set by user
80
+ end
81
+ end
82
+
83
+ def invalid_data_stream_params(params)
84
+ shared_params = LogStash::PluginMixins::ElasticSearch::APIConfigs::CONFIG_PARAMS.keys.map(&:to_s)
85
+ params.reject do |name, value|
86
+ # NOTE: intentionally do not support explicit DS configuration like:
87
+ # - `index => ...` identifier provided by data_stream_xxx settings
88
+ # - `manage_template => false` implied by not setting the parameter
89
+ case name
90
+ when 'action'
91
+ value == 'create'
92
+ when 'routing', 'pipeline'
93
+ true
94
+ when 'data_stream'
95
+ value.to_s == 'true'
96
+ else
97
+ name.start_with?('data_stream_') ||
98
+ shared_params.include?(name) ||
99
+ inherited_internal_config_param?(name) # 'id', 'enabled_metric' etc
100
+ end
101
+ end
102
+ end
103
+
104
+ def inherited_internal_config_param?(name)
105
+ self.class.superclass.get_config.key?(name.to_s) # superclass -> LogStash::Outputs::Base
106
+ end
107
+
108
+ DATA_STREAMS_ORIGIN_ES_VERSION = '7.9.0'
109
+
110
+ # @return [Gem::Version] if ES supports DS nil (or raise) otherwise
111
+ def assert_es_version_supports_data_streams
112
+ fail 'no last_es_version' unless last_es_version # assert - should not happen
113
+ es_version = Gem::Version.create(last_es_version)
114
+ if es_version < Gem::Version.create(DATA_STREAMS_ORIGIN_ES_VERSION)
115
+ @logger.error "Elasticsearch version does not support data streams, Logstash might end up writing to an index", es_version: es_version.version
116
+ # NOTE: when switching to synchronous check from register, this should be a ConfigurationError
117
+ raise LogStash::Error, "A data_stream configuration is only supported since Elasticsearch #{DATA_STREAMS_ORIGIN_ES_VERSION} " +
118
+ "(detected version #{es_version.version}), please upgrade your cluster"
119
+ end
120
+ es_version # return truthy
121
+ end
122
+
123
+ DATA_STREAMS_ENABLED_BY_DEFAULT_LS_VERSION = '8.0.0'
124
+
125
+ # when data_stream => is either 'auto' or not set
126
+ def data_stream_default(data_stream_params, valid_data_stream_config)
127
+ ds_default = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(DATA_STREAMS_ENABLED_BY_DEFAULT_LS_VERSION)
128
+
129
+ if ds_default # LS 8.0
130
+ return false unless valid_data_stream_config
131
+
132
+ @logger.debug 'Configuration is data stream compliant'
133
+ return true
134
+ end
135
+
136
+ # LS 7.x
137
+ if valid_data_stream_config && !data_stream_params.any?
138
+ @logger.warn "Configuration is data stream compliant but due backwards compatibility Logstash 7.x will not assume " +
139
+ "writing to a data-stream, default behavior will change on Logstash 8.0 " +
140
+ "(set `data_stream => true/false` to disable this warning)"
141
+ end
142
+ false
143
+ end
144
+
145
+ # an {event_action_tuple} replacement when a data-stream configuration is detected
146
+ def data_stream_event_action_tuple(event)
147
+ event_data = event.to_hash
148
+ data_stream_event_sync(event_data) if data_stream_sync_fields
149
+ ['create', common_event_params(event), event_data] # action always 'create'
150
+ end
151
+
152
+ DATA_STREAM_SYNC_FIELDS = [ 'type', 'dataset', 'namespace' ].freeze
153
+
154
+ def data_stream_event_sync(event_data)
155
+ data_stream = event_data['data_stream']
156
+ if data_stream.is_a?(Hash)
157
+ unless data_stream_auto_routing
158
+ sync_fields = DATA_STREAM_SYNC_FIELDS.select { |name| data_stream.key?(name) && data_stream[name] != send(:"data_stream_#{name}") }
159
+ if sync_fields.any? # these fields will need to be overwritten
160
+ info = sync_fields.inject({}) { |info, name| info[name] = data_stream[name]; info }
161
+ info[:event] = event_data
162
+ @logger.warn "Some data_stream fields are out of sync, these will be updated to reflect data-stream name", info
163
+
164
+ # NOTE: we work directly with event.to_hash data thus fine to mutate the 'data_stream' hash
165
+ sync_fields.each { |name| data_stream[name] = nil } # fallback to ||= bellow
166
+ end
167
+ end
168
+ else
169
+ unless data_stream.nil?
170
+ @logger.warn "Invalid 'data_stream' field type, due fields sync will overwrite", value: data_stream, event: event_data
171
+ end
172
+ event_data['data_stream'] = data_stream = Hash.new
173
+ end
174
+
175
+ data_stream['type'] ||= data_stream_type
176
+ data_stream['dataset'] ||= data_stream_dataset
177
+ data_stream['namespace'] ||= data_stream_namespace
178
+
179
+ event_data
180
+ end
181
+
182
+ module Validator
183
+
184
+ # @override {LogStash::Config::Mixin::validate_value} to handle custom validators
185
+ # @param value [Array<Object>]
186
+ # @param validator [nil,Array,Symbol]
187
+ # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value
188
+ # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason.
189
+ def validate_value(value, validator)
190
+ case validator
191
+ when :dataset_identifier then validate_dataset_identifier(value)
192
+ when :namespace_identifier then validate_namespace_identifier(value)
193
+ else super
194
+ end
195
+ end
196
+
197
+ private
198
+
199
+ def validate_dataset_identifier(value)
200
+ valid, value = validate_value(value, :string)
201
+ return false, value unless valid
202
+
203
+ validate_identifier(value)
204
+ end
205
+
206
+ def validate_namespace_identifier(value)
207
+ valid, value = validate_value(value, :string)
208
+ return false, value unless valid
209
+
210
+ validate_identifier(value)
211
+ end
212
+
213
+ def validate_identifier(value, max_size = 100)
214
+ if value.empty?
215
+ return false, "Invalid identifier - empty string"
216
+ end
217
+ if value.bytesize > max_size
218
+ return false, "Invalid identifier - too long (#{value.bytesize} bytes)"
219
+ end
220
+ # cannot include \, /, *, ?, ", <, >, |, ' ' (space char), ',', #, :
221
+ if value.match? Regexp.union(INVALID_IDENTIFIER_CHARS)
222
+ return false, "Invalid characters detected #{INVALID_IDENTIFIER_CHARS.inspect} are not allowed"
223
+ end
224
+ return true, value
225
+ end
226
+
227
+ INVALID_IDENTIFIER_CHARS = [ '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':' ]
228
+ private_constant :INVALID_IDENTIFIER_CHARS
229
+
230
+ end
231
+
232
+ end
233
+ end end end