logstash-output-elasticsearch 10.8.1-java → 11.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +1 -1
  4. data/docs/index.asciidoc +282 -114
  5. data/lib/logstash/outputs/elasticsearch.rb +125 -65
  6. data/lib/logstash/outputs/elasticsearch/data_stream_support.rb +233 -0
  7. data/lib/logstash/outputs/elasticsearch/http_client.rb +59 -21
  8. data/lib/logstash/outputs/elasticsearch/http_client/pool.rb +47 -34
  9. data/lib/logstash/outputs/elasticsearch/ilm.rb +11 -12
  10. data/lib/logstash/outputs/elasticsearch/license_checker.rb +19 -22
  11. data/lib/logstash/outputs/elasticsearch/template_manager.rb +3 -5
  12. data/lib/logstash/plugin_mixins/elasticsearch/api_configs.rb +157 -153
  13. data/lib/logstash/plugin_mixins/elasticsearch/common.rb +80 -60
  14. data/logstash-output-elasticsearch.gemspec +2 -2
  15. data/spec/es_spec_helper.rb +3 -6
  16. data/spec/integration/outputs/data_stream_spec.rb +61 -0
  17. data/spec/integration/outputs/ilm_spec.rb +22 -18
  18. data/spec/integration/outputs/ingest_pipeline_spec.rb +4 -2
  19. data/spec/integration/outputs/retry_spec.rb +14 -2
  20. data/spec/integration/outputs/sniffer_spec.rb +0 -1
  21. data/spec/spec_helper.rb +14 -0
  22. data/spec/unit/http_client_builder_spec.rb +9 -9
  23. data/spec/unit/outputs/elasticsearch/data_stream_support_spec.rb +542 -0
  24. data/spec/unit/outputs/elasticsearch/http_client/manticore_adapter_spec.rb +1 -0
  25. data/spec/unit/outputs/elasticsearch/http_client/pool_spec.rb +27 -13
  26. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +59 -41
  27. data/spec/unit/outputs/elasticsearch/template_manager_spec.rb +1 -3
  28. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +4 -5
  29. data/spec/unit/outputs/elasticsearch_spec.rb +228 -38
  30. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +1 -2
  31. data/spec/unit/outputs/error_whitelist_spec.rb +4 -3
  32. data/spec/unit/outputs/license_check_spec.rb +0 -16
  33. metadata +23 -16
@@ -3,8 +3,8 @@ require "logstash/namespace"
3
3
  require "logstash/environment"
4
4
  require "logstash/outputs/base"
5
5
  require "logstash/json"
6
- require "concurrent"
7
- require "stud/buffer"
6
+ require "concurrent/atomic/atomic_boolean"
7
+ require "stud/interval"
8
8
  require "socket" # for Socket.gethostname
9
9
  require "thread" # for safe queueing
10
10
  require "uri" # for escaping user input
@@ -92,6 +92,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
92
92
  require "logstash/plugin_mixins/elasticsearch/api_configs"
93
93
  require "logstash/plugin_mixins/elasticsearch/common"
94
94
  require "logstash/outputs/elasticsearch/ilm"
95
+ require "logstash/outputs/elasticsearch/data_stream_support"
95
96
  require 'logstash/plugin_mixins/ecs_compatibility_support'
96
97
 
97
98
  # Protocol agnostic methods
@@ -106,6 +107,9 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
106
107
  # Generic/API config options that any document indexer output needs
107
108
  include(LogStash::PluginMixins::ElasticSearch::APIConfigs)
108
109
 
110
+ # DS support
111
+ include(LogStash::Outputs::ElasticSearch::DataStreamSupport)
112
+
109
113
  DEFAULT_POLICY = "logstash-policy"
110
114
 
111
115
  config_name "elasticsearch"
@@ -122,7 +126,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
122
126
  # would use the foo field for the action
123
127
  #
124
128
  # For more details on actions, check out the http://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html[Elasticsearch bulk API documentation]
125
- config :action, :validate => :string, :default => "index"
129
+ config :action, :validate => :string # :default => "index" unless data_stream
126
130
 
127
131
  # The index to write events to. This can be dynamic using the `%{foo}` syntax.
128
132
  # The default value will partition your indices by day so you can more easily
@@ -247,6 +251,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
247
251
  # ILM policy to use, if undefined the default policy will be used.
248
252
  config :ilm_policy, :validate => :string, :default => DEFAULT_POLICY
249
253
 
254
+ attr_reader :client
250
255
  attr_reader :default_index
251
256
  attr_reader :default_ilm_rollover_alias
252
257
  attr_reader :default_template_name
@@ -257,26 +262,53 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
257
262
  end
258
263
 
259
264
  def register
260
- @template_installed = Concurrent::AtomicBoolean.new(false)
265
+ @after_successful_connection_done = Concurrent::AtomicBoolean.new(false)
261
266
  @stopping = Concurrent::AtomicBoolean.new(false)
262
- # To support BWC, we check if DLQ exists in core (< 5.4). If it doesn't, we use nil to resort to previous behavior.
263
- @dlq_writer = dlq_enabled? ? execution_context.dlq_writer : nil
264
267
 
265
268
  check_action_validity
266
269
 
270
+ @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts.map(&:sanitized).map(&:to_s))
271
+
267
272
  # the license_checking behaviour in the Pool class is externalized in the LogStash::ElasticSearchOutputLicenseChecker
268
273
  # class defined in license_check.rb. This license checking is specific to the elasticsearch output here and passed
269
274
  # to build_client down to the Pool class.
270
- build_client(LicenseChecker.new(@logger))
275
+ @client = build_client(LicenseChecker.new(@logger))
276
+
277
+ @after_successful_connection_thread = after_successful_connection do
278
+ begin
279
+ finish_register
280
+ true # thread.value
281
+ rescue => e
282
+ # we do not want to halt the thread with an exception as that has consequences for LS
283
+ e # thread.value
284
+ ensure
285
+ @after_successful_connection_done.make_true
286
+ end
287
+ end
271
288
 
272
- @template_installer = setup_after_successful_connection do
273
- discover_cluster_uuid
274
- install_template
275
- setup_ilm if ilm_in_use?
289
+ # To support BWC, we check if DLQ exists in core (< 5.4). If it doesn't, we use nil to resort to previous behavior.
290
+ @dlq_writer = dlq_enabled? ? execution_context.dlq_writer : nil
291
+
292
+ if data_stream_config?
293
+ @event_mapper = -> (e) { data_stream_event_action_tuple(e) }
294
+ @event_target = -> (e) { data_stream_name(e) }
295
+ @index = "#{data_stream_type}-#{data_stream_dataset}-#{data_stream_namespace}".freeze # default name
296
+ else
297
+ @event_mapper = -> (e) { event_action_tuple(e) }
298
+ @event_target = -> (e) { e.sprintf(@index) }
276
299
  end
300
+
277
301
  @bulk_request_metrics = metric.namespace(:bulk_requests)
278
302
  @document_level_metrics = metric.namespace(:documents)
279
- @logger.info("New Elasticsearch output", :class => self.class.name, :hosts => @hosts.map(&:sanitized).map(&:to_s))
303
+ end
304
+
305
+ # @override post-register when ES connection established
306
+ def finish_register
307
+ assert_es_version_supports_data_streams if data_stream_config?
308
+ discover_cluster_uuid
309
+ install_template
310
+ setup_ilm if ilm_in_use?
311
+ super
280
312
  end
281
313
 
282
314
  # @override to handle proxy => '' as if none was set
@@ -297,46 +329,47 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
297
329
 
298
330
  # Receive an array of events and immediately attempt to index them (no buffering)
299
331
  def multi_receive(events)
300
- until @template_installed.true?
301
- sleep 1
332
+ wait_for_successful_connection if @after_successful_connection_done
333
+ retrying_submit map_events(events)
334
+ end
335
+
336
+ def map_events(events)
337
+ events.map(&@event_mapper)
338
+ end
339
+
340
+ def wait_for_successful_connection
341
+ after_successful_connection_done = @after_successful_connection_done
342
+ return unless after_successful_connection_done
343
+ stoppable_sleep 1 until after_successful_connection_done.true?
344
+
345
+ status = @after_successful_connection_thread && @after_successful_connection_thread.value
346
+ if status.is_a?(Exception) # check if thread 'halted' with an error
347
+ # keep logging that something isn't right (from every #multi_receive)
348
+ @logger.error "Elasticsearch setup did not complete normally, please review previously logged errors",
349
+ message: status.message, exception: status.class
350
+ else
351
+ @after_successful_connection_done = nil # do not execute __method__ again if all went well
302
352
  end
303
- retrying_submit(events.map {|e| event_action_tuple(e)})
304
353
  end
354
+ private :wait_for_successful_connection
305
355
 
306
356
  def close
307
357
  @stopping.make_true if @stopping
308
- stop_template_installer
358
+ stop_after_successful_connection_thread
309
359
  @client.close if @client
310
360
  end
311
361
 
312
- # not private because used by ILM specs
313
- def stop_template_installer
314
- @template_installer.join unless @template_installer.nil?
362
+ private
363
+
364
+ def stop_after_successful_connection_thread
365
+ @after_successful_connection_thread.join unless @after_successful_connection_thread.nil?
315
366
  end
316
367
 
317
- # not private for elasticsearch_spec.rb
318
- # Convert the event into a 3-tuple of action, params, and event
368
+ # Convert the event into a 3-tuple of action, params and event hash
319
369
  def event_action_tuple(event)
320
- action = event.sprintf(@action)
321
-
322
- params = {
323
- :_id => @document_id ? event.sprintf(@document_id) : nil,
324
- :_index => event.sprintf(@index),
325
- routing_field_name => @routing ? event.sprintf(@routing) : nil
326
- }
327
-
370
+ params = common_event_params(event)
328
371
  params[:_type] = get_event_type(event) if use_event_type?(nil)
329
372
 
330
- if @pipeline
331
- value = event.sprintf(@pipeline)
332
- # convention: empty string equates to not using a pipeline
333
- # this is useful when using a field reference in the pipeline setting, e.g.
334
- # elasticsearch {
335
- # pipeline => "%{[@metadata][pipeline]}"
336
- # }
337
- params[:pipeline] = value unless value.empty?
338
- end
339
-
340
373
  if @parent
341
374
  if @join_field
342
375
  join_value = event.get(@join_field)
@@ -348,26 +381,40 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
348
381
  end
349
382
  end
350
383
 
384
+ action = event.sprintf(@action || 'index')
385
+
351
386
  if action == 'update'
352
387
  params[:_upsert] = LogStash::Json.load(event.sprintf(@upsert)) if @upsert != ""
353
388
  params[:_script] = event.sprintf(@script) if @script != ""
354
389
  params[retry_on_conflict_action_name] = @retry_on_conflict
355
390
  end
356
391
 
357
- if @version
358
- params[:version] = event.sprintf(@version)
359
- end
360
-
361
- if @version_type
362
- params[:version_type] = event.sprintf(@version_type)
363
- end
392
+ params[:version] = event.sprintf(@version) if @version
393
+ params[:version_type] = event.sprintf(@version_type) if @version_type
364
394
 
365
- [action, params, event]
395
+ [action, params, event.to_hash]
366
396
  end
367
397
 
368
- # not private for elasticsearch_spec.rb
369
- def retry_on_conflict_action_name
370
- maximum_seen_major_version >= 7 ? :retry_on_conflict : :_retry_on_conflict
398
+ # @return Hash (initial) parameters for given event
399
+ # @private shared event params factory between index and data_stream mode
400
+ def common_event_params(event)
401
+ params = {
402
+ :_id => @document_id ? event.sprintf(@document_id) : nil,
403
+ :_index => @event_target.call(event),
404
+ routing_field_name => @routing ? event.sprintf(@routing) : nil
405
+ }
406
+
407
+ if @pipeline
408
+ value = event.sprintf(@pipeline)
409
+ # convention: empty string equates to not using a pipeline
410
+ # this is useful when using a field reference in the pipeline setting, e.g.
411
+ # elasticsearch {
412
+ # pipeline => "%{[@metadata][pipeline]}"
413
+ # }
414
+ params[:pipeline] = value unless value.empty?
415
+ end
416
+
417
+ params
371
418
  end
372
419
 
373
420
  @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-elasticsearch-/ }
@@ -377,38 +424,47 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
377
424
  require "logstash/outputs/elasticsearch/#{name}"
378
425
  end
379
426
 
380
- private
427
+ def retry_on_conflict_action_name
428
+ maximum_seen_major_version >= 7 ? :retry_on_conflict : :_retry_on_conflict
429
+ end
381
430
 
382
431
  def routing_field_name
383
432
  maximum_seen_major_version >= 6 ? :routing : :_routing
384
433
  end
385
434
 
386
435
  # Determine the correct value for the 'type' field for the given event
387
- DEFAULT_EVENT_TYPE_ES6="doc".freeze
388
- DEFAULT_EVENT_TYPE_ES7="_doc".freeze
436
+ DEFAULT_EVENT_TYPE_ES6 = "doc".freeze
437
+ DEFAULT_EVENT_TYPE_ES7 = "_doc".freeze
438
+
389
439
  def get_event_type(event)
390
440
  # Set the 'type' value for the index.
391
441
  type = if @document_type
392
442
  event.sprintf(@document_type)
393
443
  else
394
- if maximum_seen_major_version < 6
395
- event.get("type") || DEFAULT_EVENT_TYPE_ES6
396
- elsif maximum_seen_major_version == 6
444
+ major_version = maximum_seen_major_version
445
+ if major_version < 6
446
+ es5_event_type(event)
447
+ elsif major_version == 6
397
448
  DEFAULT_EVENT_TYPE_ES6
398
- elsif maximum_seen_major_version == 7
449
+ elsif major_version == 7
399
450
  DEFAULT_EVENT_TYPE_ES7
400
451
  else
401
452
  nil
402
453
  end
403
454
  end
404
455
 
405
- if !(type.is_a?(String) || type.is_a?(Numeric))
406
- @logger.warn("Bad event type! Non-string/integer type value set!", :type_class => type.class, :type_value => type.to_s, :event => event)
407
- end
408
-
409
456
  type.to_s
410
457
  end
411
458
 
459
+ def es5_event_type(event)
460
+ type = event.get('type')
461
+ return DEFAULT_EVENT_TYPE_ES6 unless type
462
+ if !type.is_a?(String) && !type.is_a?(Numeric)
463
+ @logger.warn("Bad event type (non-string/integer type value set)", :type_class => type.class, :type_value => type, :event => event.to_hash)
464
+ end
465
+ type
466
+ end
467
+
412
468
  ##
413
469
  # WARNING: This method is overridden in a subclass in Logstash Core 7.7-7.8's monitoring,
414
470
  # where a `client` argument is both required and ignored. In later versions of
@@ -417,12 +473,15 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
417
473
  # @param noop_required_client [nil]: required `nil` for legacy reasons.
418
474
  # @return [Boolean]
419
475
  def use_event_type?(noop_required_client)
420
- maximum_seen_major_version < 8
476
+ # always set type for ES <= 6
477
+ # for ES 7 only set it if the user defined it
478
+ (maximum_seen_major_version < 7) || (maximum_seen_major_version == 7 && @document_type)
421
479
  end
422
480
 
423
481
  def install_template
424
482
  TemplateManager.install_template(self)
425
- @template_installed.make_true
483
+ rescue => e
484
+ @logger.error("Failed to install template", message: e.message, exception: e.class, backtrace: e.backtrace)
426
485
  end
427
486
 
428
487
  def setup_ecs_compatibility_related_defaults
@@ -445,13 +504,14 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
445
504
  end
446
505
 
447
506
  # To be overidden by the -java version
448
- VALID_HTTP_ACTIONS=["index", "delete", "create", "update"]
507
+ VALID_HTTP_ACTIONS = ["index", "delete", "create", "update"]
449
508
  def valid_actions
450
509
  VALID_HTTP_ACTIONS
451
510
  end
452
511
 
453
512
  def check_action_validity
454
- raise LogStash::ConfigurationError, "No action specified!" unless @action
513
+ return if @action.nil? # not set
514
+ raise LogStash::ConfigurationError, "No action specified!" if @action.empty?
455
515
 
456
516
  # If we're using string interpolation, we're good!
457
517
  return if @action =~ /%{.+}/
@@ -0,0 +1,233 @@
1
+ module LogStash module Outputs class ElasticSearch
2
+ # DS specific behavior/configuration.
3
+ module DataStreamSupport
4
+
5
+ def self.included(base)
6
+ # Defines whether data will be indexed into an Elasticsearch data stream,
7
+ # `data_stream_*` settings will only be used if this setting is enabled!
8
+ # This setting supports values `true`, `false`, and `auto`.
9
+ # Defaults to `false` in Logstash 7.x and `auto` starting in Logstash 8.0.
10
+ base.config :data_stream, :validate => ['true', 'false', 'auto']
11
+
12
+ base.config :data_stream_type, :validate => ['logs', 'metrics', 'synthetics'], :default => 'logs'
13
+ base.config :data_stream_dataset, :validate => :dataset_identifier, :default => 'generic'
14
+ base.config :data_stream_namespace, :validate => :namespace_identifier, :default => 'default'
15
+
16
+ base.config :data_stream_sync_fields, :validate => :boolean, :default => true
17
+ base.config :data_stream_auto_routing, :validate => :boolean, :default => true
18
+
19
+ base.extend(Validator)
20
+ end
21
+
22
+ # @note assumes to be running AFTER {after_successful_connection} completed, due ES version checks
23
+ def data_stream_config?
24
+ @data_stream_config.nil? ? @data_stream_config = check_data_stream_config! : @data_stream_config
25
+ end
26
+
27
+ private
28
+
29
+ def data_stream_name(event)
30
+ data_stream = event.get('data_stream')
31
+ return @index if !data_stream_auto_routing || !data_stream.is_a?(Hash)
32
+
33
+ type = data_stream['type'] || data_stream_type
34
+ dataset = data_stream['dataset'] || data_stream_dataset
35
+ namespace = data_stream['namespace'] || data_stream_namespace
36
+ "#{type}-#{dataset}-#{namespace}"
37
+ end
38
+
39
+ # @param params the user configuration for the ES output
40
+ # @note LS initialized configuration (with filled defaults) won't detect as data-stream
41
+ # compatible, only explicit (`original_params`) config should be tested.
42
+ # @return [TrueClass|FalseClass] whether given configuration is data-stream compatible
43
+ def check_data_stream_config!(params = original_params)
44
+ data_stream_params = params.select { |name, _| name.start_with?('data_stream_') } # exclude data_stream =>
45
+ invalid_data_stream_params = invalid_data_stream_params(params)
46
+
47
+ case data_stream_explicit_value
48
+ when false
49
+ if data_stream_params.any?
50
+ @logger.error "Ambiguous configuration; data stream settings must not be present when data streams is disabled (caused by: `data_stream => false`)", data_stream_params
51
+ raise LogStash::ConfigurationError, "Ambiguous configuration, please remove data stream specific settings: #{data_stream_params.keys}"
52
+ end
53
+ return false
54
+ when true
55
+ if invalid_data_stream_params.any?
56
+ @logger.error "Invalid data stream configuration, following parameters are not supported:", invalid_data_stream_params
57
+ raise LogStash::ConfigurationError, "Invalid data stream configuration: #{invalid_data_stream_params.keys}"
58
+ end
59
+ return true
60
+ else
61
+ use_data_stream = data_stream_default(data_stream_params, invalid_data_stream_params.empty?)
62
+ if !use_data_stream && data_stream_params.any?
63
+ # DS (auto) disabled but there's still some data-stream parameters (and no `data_stream => false`)
64
+ @logger.error "Ambiguous configuration; data stream settings are present, but data streams are not enabled", data_stream_params
65
+ raise LogStash::ConfigurationError, "Ambiguous configuration, please set data_stream => true " +
66
+ "or remove data stream specific settings: #{data_stream_params.keys}"
67
+ end
68
+ use_data_stream
69
+ end
70
+ end
71
+
72
+ def data_stream_explicit_value
73
+ case @data_stream
74
+ when 'true'
75
+ return true
76
+ when 'false'
77
+ return false
78
+ else
79
+ return nil # 'auto' or not set by user
80
+ end
81
+ end
82
+
83
+ def invalid_data_stream_params(params)
84
+ shared_params = LogStash::PluginMixins::ElasticSearch::APIConfigs::CONFIG_PARAMS.keys.map(&:to_s)
85
+ params.reject do |name, value|
86
+ # NOTE: intentionally do not support explicit DS configuration like:
87
+ # - `index => ...` identifier provided by data_stream_xxx settings
88
+ # - `manage_template => false` implied by not setting the parameter
89
+ case name
90
+ when 'action'
91
+ value == 'create'
92
+ when 'routing', 'pipeline'
93
+ true
94
+ when 'data_stream'
95
+ value.to_s == 'true'
96
+ else
97
+ name.start_with?('data_stream_') ||
98
+ shared_params.include?(name) ||
99
+ inherited_internal_config_param?(name) # 'id', 'enabled_metric' etc
100
+ end
101
+ end
102
+ end
103
+
104
+ def inherited_internal_config_param?(name)
105
+ self.class.superclass.get_config.key?(name.to_s) # superclass -> LogStash::Outputs::Base
106
+ end
107
+
108
+ DATA_STREAMS_ORIGIN_ES_VERSION = '7.9.0'
109
+
110
+ # @return [Gem::Version] if ES supports DS nil (or raise) otherwise
111
+ def assert_es_version_supports_data_streams
112
+ fail 'no last_es_version' unless last_es_version # assert - should not happen
113
+ es_version = Gem::Version.create(last_es_version)
114
+ if es_version < Gem::Version.create(DATA_STREAMS_ORIGIN_ES_VERSION)
115
+ @logger.error "Elasticsearch version does not support data streams, Logstash might end up writing to an index", es_version: es_version.version
116
+ # NOTE: when switching to synchronous check from register, this should be a ConfigurationError
117
+ raise LogStash::Error, "A data_stream configuration is only supported since Elasticsearch #{DATA_STREAMS_ORIGIN_ES_VERSION} " +
118
+ "(detected version #{es_version.version}), please upgrade your cluster"
119
+ end
120
+ es_version # return truthy
121
+ end
122
+
123
+ DATA_STREAMS_ENABLED_BY_DEFAULT_LS_VERSION = '8.0.0'
124
+
125
+ # when data_stream => is either 'auto' or not set
126
+ def data_stream_default(data_stream_params, valid_data_stream_config)
127
+ ds_default = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(DATA_STREAMS_ENABLED_BY_DEFAULT_LS_VERSION)
128
+
129
+ if ds_default # LS 8.0
130
+ return false unless valid_data_stream_config
131
+
132
+ @logger.debug 'Configuration is data stream compliant'
133
+ return true
134
+ end
135
+
136
+ # LS 7.x
137
+ if valid_data_stream_config && !data_stream_params.any?
138
+ @logger.warn "Configuration is data stream compliant but due backwards compatibility Logstash 7.x will not assume " +
139
+ "writing to a data-stream, default behavior will change on Logstash 8.0 " +
140
+ "(set `data_stream => true/false` to disable this warning)"
141
+ end
142
+ false
143
+ end
144
+
145
+ # an {event_action_tuple} replacement when a data-stream configuration is detected
146
+ def data_stream_event_action_tuple(event)
147
+ event_data = event.to_hash
148
+ data_stream_event_sync(event_data) if data_stream_sync_fields
149
+ ['create', common_event_params(event), event_data] # action always 'create'
150
+ end
151
+
152
+ DATA_STREAM_SYNC_FIELDS = [ 'type', 'dataset', 'namespace' ].freeze
153
+
154
+ def data_stream_event_sync(event_data)
155
+ data_stream = event_data['data_stream']
156
+ if data_stream.is_a?(Hash)
157
+ unless data_stream_auto_routing
158
+ sync_fields = DATA_STREAM_SYNC_FIELDS.select { |name| data_stream.key?(name) && data_stream[name] != send(:"data_stream_#{name}") }
159
+ if sync_fields.any? # these fields will need to be overwritten
160
+ info = sync_fields.inject({}) { |info, name| info[name] = data_stream[name]; info }
161
+ info[:event] = event_data
162
+ @logger.warn "Some data_stream fields are out of sync, these will be updated to reflect data-stream name", info
163
+
164
+ # NOTE: we work directly with event.to_hash data thus fine to mutate the 'data_stream' hash
165
+ sync_fields.each { |name| data_stream[name] = nil } # fallback to ||= bellow
166
+ end
167
+ end
168
+ else
169
+ unless data_stream.nil?
170
+ @logger.warn "Invalid 'data_stream' field type, due fields sync will overwrite", value: data_stream, event: event_data
171
+ end
172
+ event_data['data_stream'] = data_stream = Hash.new
173
+ end
174
+
175
+ data_stream['type'] ||= data_stream_type
176
+ data_stream['dataset'] ||= data_stream_dataset
177
+ data_stream['namespace'] ||= data_stream_namespace
178
+
179
+ event_data
180
+ end
181
+
182
+ module Validator
183
+
184
+ # @override {LogStash::Config::Mixin::validate_value} to handle custom validators
185
+ # @param value [Array<Object>]
186
+ # @param validator [nil,Array,Symbol]
187
+ # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value
188
+ # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason.
189
+ def validate_value(value, validator)
190
+ case validator
191
+ when :dataset_identifier then validate_dataset_identifier(value)
192
+ when :namespace_identifier then validate_namespace_identifier(value)
193
+ else super
194
+ end
195
+ end
196
+
197
+ private
198
+
199
+ def validate_dataset_identifier(value)
200
+ valid, value = validate_value(value, :string)
201
+ return false, value unless valid
202
+
203
+ validate_identifier(value)
204
+ end
205
+
206
+ def validate_namespace_identifier(value)
207
+ valid, value = validate_value(value, :string)
208
+ return false, value unless valid
209
+
210
+ validate_identifier(value)
211
+ end
212
+
213
+ def validate_identifier(value, max_size = 100)
214
+ if value.empty?
215
+ return false, "Invalid identifier - empty string"
216
+ end
217
+ if value.bytesize > max_size
218
+ return false, "Invalid identifier - too long (#{value.bytesize} bytes)"
219
+ end
220
+ # cannot include \, /, *, ?, ", <, >, |, ' ' (space char), ',', #, :
221
+ if value.match? Regexp.union(INVALID_IDENTIFIER_CHARS)
222
+ return false, "Invalid characters detected #{INVALID_IDENTIFIER_CHARS.inspect} are not allowed"
223
+ end
224
+ return true, value
225
+ end
226
+
227
+ INVALID_IDENTIFIER_CHARS = [ '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':' ]
228
+ private_constant :INVALID_IDENTIFIER_CHARS
229
+
230
+ end
231
+
232
+ end
233
+ end end end