connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +0 -4
  4. data/lib/app/dispatcher.rb +17 -42
  5. data/lib/app/preflight_check.rb +0 -11
  6. data/lib/connectors/base/connector.rb +14 -43
  7. data/lib/connectors/example/connector.rb +0 -6
  8. data/lib/connectors/gitlab/connector.rb +1 -6
  9. data/lib/connectors/mongodb/connector.rb +43 -47
  10. data/lib/connectors/sync_status.rb +1 -6
  11. data/lib/core/configuration.rb +1 -3
  12. data/lib/core/connector_settings.rb +16 -52
  13. data/lib/core/elastic_connector_actions.rb +59 -320
  14. data/lib/core/output_sink/base_sink.rb +33 -0
  15. data/lib/core/output_sink/combined_sink.rb +38 -0
  16. data/lib/core/output_sink/console_sink.rb +51 -0
  17. data/lib/core/output_sink/es_sink.rb +74 -0
  18. data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
  19. data/lib/core/scheduler.rb +10 -40
  20. data/lib/core/single_scheduler.rb +1 -1
  21. data/lib/core/sync_job_runner.rb +16 -72
  22. data/lib/core.rb +0 -4
  23. data/lib/utility/constants.rb +0 -2
  24. data/lib/utility/errors.rb +12 -0
  25. data/lib/utility/logger.rb +1 -1
  26. data/lib/utility.rb +4 -11
  27. metadata +9 -27
  28. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
  29. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  30. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  31. data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
  32. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
  33. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
  34. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
  35. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
  36. data/lib/connectors/tolerable_error_helper.rb +0 -43
  37. data/lib/core/connector_job.rb +0 -210
  38. data/lib/core/filtering/post_process_engine.rb +0 -39
  39. data/lib/core/filtering/post_process_result.rb +0 -27
  40. data/lib/core/filtering/simple_rule.rb +0 -141
  41. data/lib/core/filtering/validation_job_runner.rb +0 -53
  42. data/lib/core/filtering/validation_status.rb +0 -17
  43. data/lib/core/filtering.rb +0 -17
  44. data/lib/core/ingestion/es_sink.rb +0 -118
  45. data/lib/core/jobs/consumer.rb +0 -114
  46. data/lib/core/jobs/producer.rb +0 -26
  47. data/lib/utility/bulk_queue.rb +0 -85
  48. data/lib/utility/error_monitor.rb +0 -108
  49. data/lib/utility/filtering.rb +0 -22
@@ -1,114 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module Jobs
11
- class Consumer
12
- def initialize(scheduler:, poll_interval: 3, termination_timeout: 60, min_threads: 1, max_threads: 5, max_queue: 100, idle_time: 5)
13
- @scheduler = scheduler
14
- @poll_interval = poll_interval
15
- @termination_timeout = termination_timeout
16
- @min_threads = min_threads
17
- @max_threads = max_threads
18
- @max_queue = max_queue
19
- @idle_time = idle_time
20
-
21
- @running = Concurrent::AtomicBoolean.new(false)
22
- end
23
-
24
- def subscribe!(index_name:)
25
- @index_name = index_name
26
-
27
- start_loop!
28
- end
29
-
30
- def running?
31
- # @TODO check if a loop thread is alive
32
- pool.running? && @running.true?
33
- end
34
-
35
- def shutdown!
36
- Utility::Logger.info("Shutting down consumer for #{@index_name} index")
37
- @running.make_false
38
- pool.shutdown
39
- pool.wait_for_termination(@termination_timeout)
40
- # reset pool
41
- @pool = nil
42
- end
43
-
44
- private
45
-
46
- def start_loop!
47
- Utility::Logger.info("Starting a new consumer for #{@index_name} index")
48
-
49
- Thread.new do
50
- # assign a name to the thread
51
- # see @TODO in #self.running?
52
- Thread.current[:name] = "consumer-group-#{@index_name}"
53
-
54
- loop do
55
- if @running.false?
56
- Utility::Logger.info('Shutting down the loop')
57
- break
58
- end
59
-
60
- sleep(@poll_interval)
61
- Utility::Logger.debug('Getting registered connectors')
62
-
63
- connectors = ready_for_sync_connectors
64
- next unless connectors.any?
65
-
66
- Utility::Logger.debug("Number of available connectors: #{connectors.size}")
67
-
68
- # @TODO It is assumed that @index_name is used to retrive pending jobs.
69
- # This will be discussed after 8.6 release
70
- pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
71
- Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
72
-
73
- pending_jobs.each do |job|
74
- connector_settings = connectors[job.connector_id]
75
-
76
- pool.post do
77
- Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
78
- Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
79
- job_runner = Core::SyncJobRunner.new(connector_settings, job)
80
- job_runner.execute
81
- rescue Core::JobAlreadyRunningError
82
- Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
83
- rescue Core::ConnectorVersionChangedError => e
84
- Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
85
- rescue StandardError => e
86
- Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
87
- end
88
- end
89
- rescue StandardError => e
90
- Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
91
- end
92
- end
93
-
94
- @running.make_true
95
- end
96
-
97
- def pool
98
- @pool ||= Concurrent::ThreadPoolExecutor.new(
99
- min_threads: @min_threads,
100
- max_threads: @max_threads,
101
- max_queue: @max_queue,
102
- fallback_policy: :abort,
103
- idletime: @idle_time
104
- )
105
- end
106
-
107
- def ready_for_sync_connectors
108
- @scheduler.connector_settings
109
- .select(&:ready_for_sync?)
110
- .inject({}) { |memo, cs| memo.merge(cs.id => cs) }
111
- end
112
- end
113
- end
114
- end
@@ -1,26 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module Jobs
11
- class Producer
12
- JOB_TYPES = %i(sync).freeze
13
-
14
- class << self
15
- def enqueue_job(job_type:, connector_settings:)
16
- raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
17
- raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
18
-
19
- ElasticConnectorActions.create_job(connector_settings: connector_settings)
20
- end
21
- end
22
- end
23
-
24
- class UnsupportedJobType < StandardError; end
25
- end
26
- end
@@ -1,85 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- require 'json'
8
-
9
- module Utility
10
- class BulkQueue
11
- class QueueOverflowError < StandardError; end
12
-
13
- # 500 items or 5MB
14
- def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
- @operation_count_threshold = operation_count_threshold.freeze
16
- @size_threshold = size_threshold.freeze
17
-
18
- @buffer = ''
19
-
20
- @current_operation_count = 0
21
-
22
- @current_buffer_size = 0
23
- @current_data_size = 0
24
- end
25
-
26
- def pop_all
27
- result = @buffer
28
-
29
- reset
30
-
31
- result
32
- end
33
-
34
- def add(operation, payload = nil)
35
- raise QueueOverflowError unless will_fit?(operation, payload)
36
-
37
- operation_size = get_size(operation)
38
- payload_size = get_size(payload)
39
-
40
- @current_operation_count += 1
41
- @current_buffer_size += operation_size
42
- @current_buffer_size += payload_size
43
- @current_data_size += payload_size
44
-
45
- @buffer << operation
46
- @buffer << "\n"
47
-
48
- if payload
49
- @buffer << payload
50
- @buffer << "\n"
51
- end
52
- end
53
-
54
- def will_fit?(operation, payload = nil)
55
- return false if @current_operation_count + 1 > @operation_count_threshold
56
-
57
- operation_size = get_size(operation)
58
- payload_size = get_size(payload)
59
-
60
- @current_buffer_size + operation_size + payload_size < @size_threshold
61
- end
62
-
63
- def current_stats
64
- {
65
- :current_operation_count => @current_operation_count,
66
- :current_buffer_size => @current_buffer_size
67
- }
68
- end
69
-
70
- private
71
-
72
- def get_size(str)
73
- return 0 unless str
74
- str.bytesize
75
- end
76
-
77
- def reset
78
- @current_operation_count = 0
79
- @current_buffer_size = 0
80
- @current_data_size = 0
81
-
82
- @buffer = ''
83
- end
84
- end
85
- end
@@ -1,108 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'time'
10
- require 'utility/errors'
11
- require 'utility/exception_tracking'
12
-
13
- module Utility
14
- class ErrorMonitor
15
- class MonitoringError < StandardError
16
- attr_accessor :tripped_by
17
-
18
- def initialize(message = nil, tripped_by: nil)
19
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
- @tripped_by = tripped_by
21
- end
22
- end
23
-
24
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
- class MaxErrorsExceededError < MonitoringError; end
26
- class MaxErrorsInWindowExceededError < MonitoringError; end
27
-
28
- attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
-
30
- def initialize(
31
- max_errors: 1000,
32
- max_consecutive_errors: 10,
33
- max_error_ratio: 0.15,
34
- window_size: 100,
35
- error_queue_size: 20
36
- )
37
- @max_errors = max_errors
38
- @max_consecutive_errors = max_consecutive_errors
39
- @max_error_ratio = max_error_ratio
40
- @window_size = window_size
41
- @total_error_count = 0
42
- @success_count = 0
43
- @consecutive_error_count = 0
44
- @window_errors = Array.new(window_size) { false }
45
- @window_index = 0
46
- @last_error = nil
47
- @error_queue_size = error_queue_size
48
- @error_queue = []
49
- end
50
-
51
- def note_success
52
- @consecutive_error_count = 0
53
- @success_count += 1
54
- increment_window_index
55
- end
56
-
57
- def note_error(error, id: Time.now.to_i)
58
- stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
- error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
- Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
- @total_error_count += 1
62
- @consecutive_error_count += 1
63
- @window_errors[@window_index] = true
64
- @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
- @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
- increment_window_index
67
- @last_error = error
68
-
69
- raise_if_necessary
70
- end
71
-
72
- def finalize
73
- total_documents = @total_error_count + @success_count
74
- if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
- raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
- end
77
- end
78
-
79
- private
80
-
81
- def raise_if_necessary
82
- error =
83
- if @consecutive_error_count > @max_consecutive_errors
84
- MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
- elsif @total_error_count > @max_errors
86
- MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
- elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
- MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
- end
90
-
91
- raise_with_last_cause(error) if error
92
- end
93
-
94
- def num_errors_in_window
95
- @window_errors.count(&:itself).to_f
96
- end
97
-
98
- def increment_window_index
99
- @window_index = (@window_index + 1) % @window_size
100
- end
101
-
102
- def raise_with_last_cause(error)
103
- raise @last_error
104
- rescue StandardError
105
- raise error
106
- end
107
- end
108
- end
@@ -1,22 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Utility
10
- class Filtering
11
- class << self
12
- def extract_filter(filtering)
13
- return {} unless filtering.present?
14
-
15
- # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
- filter = filtering.is_a?(Array) ? filtering.first : filtering
17
-
18
- filter.present? ? filter : {}
19
- end
20
- end
21
- end
22
- end