connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +0 -4
  4. data/lib/app/dispatcher.rb +17 -42
  5. data/lib/app/preflight_check.rb +0 -11
  6. data/lib/connectors/base/connector.rb +14 -43
  7. data/lib/connectors/example/connector.rb +0 -6
  8. data/lib/connectors/gitlab/connector.rb +1 -6
  9. data/lib/connectors/mongodb/connector.rb +43 -47
  10. data/lib/connectors/sync_status.rb +1 -6
  11. data/lib/core/configuration.rb +1 -3
  12. data/lib/core/connector_settings.rb +16 -52
  13. data/lib/core/elastic_connector_actions.rb +59 -320
  14. data/lib/core/output_sink/base_sink.rb +33 -0
  15. data/lib/core/output_sink/combined_sink.rb +38 -0
  16. data/lib/core/output_sink/console_sink.rb +51 -0
  17. data/lib/core/output_sink/es_sink.rb +74 -0
  18. data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
  19. data/lib/core/scheduler.rb +10 -40
  20. data/lib/core/single_scheduler.rb +1 -1
  21. data/lib/core/sync_job_runner.rb +16 -72
  22. data/lib/core.rb +0 -4
  23. data/lib/utility/constants.rb +0 -2
  24. data/lib/utility/errors.rb +12 -0
  25. data/lib/utility/logger.rb +1 -1
  26. data/lib/utility.rb +4 -11
  27. metadata +9 -27
  28. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
  29. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  30. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  31. data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
  32. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
  33. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
  34. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
  35. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
  36. data/lib/connectors/tolerable_error_helper.rb +0 -43
  37. data/lib/core/connector_job.rb +0 -210
  38. data/lib/core/filtering/post_process_engine.rb +0 -39
  39. data/lib/core/filtering/post_process_result.rb +0 -27
  40. data/lib/core/filtering/simple_rule.rb +0 -141
  41. data/lib/core/filtering/validation_job_runner.rb +0 -53
  42. data/lib/core/filtering/validation_status.rb +0 -17
  43. data/lib/core/filtering.rb +0 -17
  44. data/lib/core/ingestion/es_sink.rb +0 -118
  45. data/lib/core/jobs/consumer.rb +0 -114
  46. data/lib/core/jobs/producer.rb +0 -26
  47. data/lib/utility/bulk_queue.rb +0 -85
  48. data/lib/utility/error_monitor.rb +0 -108
  49. data/lib/utility/filtering.rb +0 -22
@@ -1,114 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module Jobs
11
- class Consumer
12
- def initialize(scheduler:, poll_interval: 3, termination_timeout: 60, min_threads: 1, max_threads: 5, max_queue: 100, idle_time: 5)
13
- @scheduler = scheduler
14
- @poll_interval = poll_interval
15
- @termination_timeout = termination_timeout
16
- @min_threads = min_threads
17
- @max_threads = max_threads
18
- @max_queue = max_queue
19
- @idle_time = idle_time
20
-
21
- @running = Concurrent::AtomicBoolean.new(false)
22
- end
23
-
24
- def subscribe!(index_name:)
25
- @index_name = index_name
26
-
27
- start_loop!
28
- end
29
-
30
- def running?
31
- # @TODO check if a loop thread is alive
32
- pool.running? && @running.true?
33
- end
34
-
35
- def shutdown!
36
- Utility::Logger.info("Shutting down consumer for #{@index_name} index")
37
- @running.make_false
38
- pool.shutdown
39
- pool.wait_for_termination(@termination_timeout)
40
- # reset pool
41
- @pool = nil
42
- end
43
-
44
- private
45
-
46
- def start_loop!
47
- Utility::Logger.info("Starting a new consumer for #{@index_name} index")
48
-
49
- Thread.new do
50
- # assign a name to the thread
51
- # see @TODO in #self.running?
52
- Thread.current[:name] = "consumer-group-#{@index_name}"
53
-
54
- loop do
55
- if @running.false?
56
- Utility::Logger.info('Shutting down the loop')
57
- break
58
- end
59
-
60
- sleep(@poll_interval)
61
- Utility::Logger.debug('Getting registered connectors')
62
-
63
- connectors = ready_for_sync_connectors
64
- next unless connectors.any?
65
-
66
- Utility::Logger.debug("Number of available connectors: #{connectors.size}")
67
-
68
- # @TODO It is assumed that @index_name is used to retrive pending jobs.
69
- # This will be discussed after 8.6 release
70
- pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
71
- Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
72
-
73
- pending_jobs.each do |job|
74
- connector_settings = connectors[job.connector_id]
75
-
76
- pool.post do
77
- Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
78
- Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
79
- job_runner = Core::SyncJobRunner.new(connector_settings, job)
80
- job_runner.execute
81
- rescue Core::JobAlreadyRunningError
82
- Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
83
- rescue Core::ConnectorVersionChangedError => e
84
- Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
85
- rescue StandardError => e
86
- Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
87
- end
88
- end
89
- rescue StandardError => e
90
- Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
91
- end
92
- end
93
-
94
- @running.make_true
95
- end
96
-
97
- def pool
98
- @pool ||= Concurrent::ThreadPoolExecutor.new(
99
- min_threads: @min_threads,
100
- max_threads: @max_threads,
101
- max_queue: @max_queue,
102
- fallback_policy: :abort,
103
- idletime: @idle_time
104
- )
105
- end
106
-
107
- def ready_for_sync_connectors
108
- @scheduler.connector_settings
109
- .select(&:ready_for_sync?)
110
- .inject({}) { |memo, cs| memo.merge(cs.id => cs) }
111
- end
112
- end
113
- end
114
- end
@@ -1,26 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module Jobs
11
- class Producer
12
- JOB_TYPES = %i(sync).freeze
13
-
14
- class << self
15
- def enqueue_job(job_type:, connector_settings:)
16
- raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
17
- raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
18
-
19
- ElasticConnectorActions.create_job(connector_settings: connector_settings)
20
- end
21
- end
22
- end
23
-
24
- class UnsupportedJobType < StandardError; end
25
- end
26
- end
@@ -1,85 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- require 'json'
8
-
9
- module Utility
10
- class BulkQueue
11
- class QueueOverflowError < StandardError; end
12
-
13
- # 500 items or 5MB
14
- def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
- @operation_count_threshold = operation_count_threshold.freeze
16
- @size_threshold = size_threshold.freeze
17
-
18
- @buffer = ''
19
-
20
- @current_operation_count = 0
21
-
22
- @current_buffer_size = 0
23
- @current_data_size = 0
24
- end
25
-
26
- def pop_all
27
- result = @buffer
28
-
29
- reset
30
-
31
- result
32
- end
33
-
34
- def add(operation, payload = nil)
35
- raise QueueOverflowError unless will_fit?(operation, payload)
36
-
37
- operation_size = get_size(operation)
38
- payload_size = get_size(payload)
39
-
40
- @current_operation_count += 1
41
- @current_buffer_size += operation_size
42
- @current_buffer_size += payload_size
43
- @current_data_size += payload_size
44
-
45
- @buffer << operation
46
- @buffer << "\n"
47
-
48
- if payload
49
- @buffer << payload
50
- @buffer << "\n"
51
- end
52
- end
53
-
54
- def will_fit?(operation, payload = nil)
55
- return false if @current_operation_count + 1 > @operation_count_threshold
56
-
57
- operation_size = get_size(operation)
58
- payload_size = get_size(payload)
59
-
60
- @current_buffer_size + operation_size + payload_size < @size_threshold
61
- end
62
-
63
- def current_stats
64
- {
65
- :current_operation_count => @current_operation_count,
66
- :current_buffer_size => @current_buffer_size
67
- }
68
- end
69
-
70
- private
71
-
72
- def get_size(str)
73
- return 0 unless str
74
- str.bytesize
75
- end
76
-
77
- def reset
78
- @current_operation_count = 0
79
- @current_buffer_size = 0
80
- @current_data_size = 0
81
-
82
- @buffer = ''
83
- end
84
- end
85
- end
@@ -1,108 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'time'
10
- require 'utility/errors'
11
- require 'utility/exception_tracking'
12
-
13
- module Utility
14
- class ErrorMonitor
15
- class MonitoringError < StandardError
16
- attr_accessor :tripped_by
17
-
18
- def initialize(message = nil, tripped_by: nil)
19
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
- @tripped_by = tripped_by
21
- end
22
- end
23
-
24
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
- class MaxErrorsExceededError < MonitoringError; end
26
- class MaxErrorsInWindowExceededError < MonitoringError; end
27
-
28
- attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
-
30
- def initialize(
31
- max_errors: 1000,
32
- max_consecutive_errors: 10,
33
- max_error_ratio: 0.15,
34
- window_size: 100,
35
- error_queue_size: 20
36
- )
37
- @max_errors = max_errors
38
- @max_consecutive_errors = max_consecutive_errors
39
- @max_error_ratio = max_error_ratio
40
- @window_size = window_size
41
- @total_error_count = 0
42
- @success_count = 0
43
- @consecutive_error_count = 0
44
- @window_errors = Array.new(window_size) { false }
45
- @window_index = 0
46
- @last_error = nil
47
- @error_queue_size = error_queue_size
48
- @error_queue = []
49
- end
50
-
51
- def note_success
52
- @consecutive_error_count = 0
53
- @success_count += 1
54
- increment_window_index
55
- end
56
-
57
- def note_error(error, id: Time.now.to_i)
58
- stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
- error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
- Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
- @total_error_count += 1
62
- @consecutive_error_count += 1
63
- @window_errors[@window_index] = true
64
- @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
- @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
- increment_window_index
67
- @last_error = error
68
-
69
- raise_if_necessary
70
- end
71
-
72
- def finalize
73
- total_documents = @total_error_count + @success_count
74
- if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
- raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
- end
77
- end
78
-
79
- private
80
-
81
- def raise_if_necessary
82
- error =
83
- if @consecutive_error_count > @max_consecutive_errors
84
- MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
- elsif @total_error_count > @max_errors
86
- MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
- elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
- MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
- end
90
-
91
- raise_with_last_cause(error) if error
92
- end
93
-
94
- def num_errors_in_window
95
- @window_errors.count(&:itself).to_f
96
- end
97
-
98
- def increment_window_index
99
- @window_index = (@window_index + 1) % @window_size
100
- end
101
-
102
- def raise_with_last_cause(error)
103
- raise @last_error
104
- rescue StandardError
105
- raise error
106
- end
107
- end
108
- end
@@ -1,22 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Utility
10
- class Filtering
11
- class << self
12
- def extract_filter(filtering)
13
- return {} unless filtering.present?
14
-
15
- # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
- filter = filtering.is_a?(Array) ? filtering.first : filtering
17
-
18
- filter.present? ? filter : {}
19
- end
20
- end
21
- end
22
- end