connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +0 -4
- data/lib/app/dispatcher.rb +17 -42
- data/lib/app/preflight_check.rb +0 -11
- data/lib/connectors/base/connector.rb +14 -43
- data/lib/connectors/example/connector.rb +0 -6
- data/lib/connectors/gitlab/connector.rb +1 -6
- data/lib/connectors/mongodb/connector.rb +43 -47
- data/lib/connectors/sync_status.rb +1 -6
- data/lib/core/configuration.rb +1 -3
- data/lib/core/connector_settings.rb +16 -52
- data/lib/core/elastic_connector_actions.rb +59 -320
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
- data/lib/core/scheduler.rb +10 -40
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +16 -72
- data/lib/core.rb +0 -4
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/errors.rb +12 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +4 -11
- metadata +9 -27
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
- data/lib/connectors/tolerable_error_helper.rb +0 -43
- data/lib/core/connector_job.rb +0 -210
- data/lib/core/filtering/post_process_engine.rb +0 -39
- data/lib/core/filtering/post_process_result.rb +0 -27
- data/lib/core/filtering/simple_rule.rb +0 -141
- data/lib/core/filtering/validation_job_runner.rb +0 -53
- data/lib/core/filtering/validation_status.rb +0 -17
- data/lib/core/filtering.rb +0 -17
- data/lib/core/ingestion/es_sink.rb +0 -118
- data/lib/core/jobs/consumer.rb +0 -114
- data/lib/core/jobs/producer.rb +0 -26
- data/lib/utility/bulk_queue.rb +0 -85
- data/lib/utility/error_monitor.rb +0 -108
- data/lib/utility/filtering.rb +0 -22
data/lib/core/jobs/consumer.rb
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module Jobs
|
11
|
-
class Consumer
|
12
|
-
def initialize(scheduler:, poll_interval: 3, termination_timeout: 60, min_threads: 1, max_threads: 5, max_queue: 100, idle_time: 5)
|
13
|
-
@scheduler = scheduler
|
14
|
-
@poll_interval = poll_interval
|
15
|
-
@termination_timeout = termination_timeout
|
16
|
-
@min_threads = min_threads
|
17
|
-
@max_threads = max_threads
|
18
|
-
@max_queue = max_queue
|
19
|
-
@idle_time = idle_time
|
20
|
-
|
21
|
-
@running = Concurrent::AtomicBoolean.new(false)
|
22
|
-
end
|
23
|
-
|
24
|
-
def subscribe!(index_name:)
|
25
|
-
@index_name = index_name
|
26
|
-
|
27
|
-
start_loop!
|
28
|
-
end
|
29
|
-
|
30
|
-
def running?
|
31
|
-
# @TODO check if a loop thread is alive
|
32
|
-
pool.running? && @running.true?
|
33
|
-
end
|
34
|
-
|
35
|
-
def shutdown!
|
36
|
-
Utility::Logger.info("Shutting down consumer for #{@index_name} index")
|
37
|
-
@running.make_false
|
38
|
-
pool.shutdown
|
39
|
-
pool.wait_for_termination(@termination_timeout)
|
40
|
-
# reset pool
|
41
|
-
@pool = nil
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
def start_loop!
|
47
|
-
Utility::Logger.info("Starting a new consumer for #{@index_name} index")
|
48
|
-
|
49
|
-
Thread.new do
|
50
|
-
# assign a name to the thread
|
51
|
-
# see @TODO in #self.running?
|
52
|
-
Thread.current[:name] = "consumer-group-#{@index_name}"
|
53
|
-
|
54
|
-
loop do
|
55
|
-
if @running.false?
|
56
|
-
Utility::Logger.info('Shutting down the loop')
|
57
|
-
break
|
58
|
-
end
|
59
|
-
|
60
|
-
sleep(@poll_interval)
|
61
|
-
Utility::Logger.debug('Getting registered connectors')
|
62
|
-
|
63
|
-
connectors = ready_for_sync_connectors
|
64
|
-
next unless connectors.any?
|
65
|
-
|
66
|
-
Utility::Logger.debug("Number of available connectors: #{connectors.size}")
|
67
|
-
|
68
|
-
# @TODO It is assumed that @index_name is used to retrive pending jobs.
|
69
|
-
# This will be discussed after 8.6 release
|
70
|
-
pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
|
71
|
-
Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
|
72
|
-
|
73
|
-
pending_jobs.each do |job|
|
74
|
-
connector_settings = connectors[job.connector_id]
|
75
|
-
|
76
|
-
pool.post do
|
77
|
-
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
78
|
-
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
79
|
-
job_runner = Core::SyncJobRunner.new(connector_settings, job)
|
80
|
-
job_runner.execute
|
81
|
-
rescue Core::JobAlreadyRunningError
|
82
|
-
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
83
|
-
rescue Core::ConnectorVersionChangedError => e
|
84
|
-
Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
|
85
|
-
rescue StandardError => e
|
86
|
-
Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
|
87
|
-
end
|
88
|
-
end
|
89
|
-
rescue StandardError => e
|
90
|
-
Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
@running.make_true
|
95
|
-
end
|
96
|
-
|
97
|
-
def pool
|
98
|
-
@pool ||= Concurrent::ThreadPoolExecutor.new(
|
99
|
-
min_threads: @min_threads,
|
100
|
-
max_threads: @max_threads,
|
101
|
-
max_queue: @max_queue,
|
102
|
-
fallback_policy: :abort,
|
103
|
-
idletime: @idle_time
|
104
|
-
)
|
105
|
-
end
|
106
|
-
|
107
|
-
def ready_for_sync_connectors
|
108
|
-
@scheduler.connector_settings
|
109
|
-
.select(&:ready_for_sync?)
|
110
|
-
.inject({}) { |memo, cs| memo.merge(cs.id => cs) }
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
data/lib/core/jobs/producer.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module Jobs
|
11
|
-
class Producer
|
12
|
-
JOB_TYPES = %i(sync).freeze
|
13
|
-
|
14
|
-
class << self
|
15
|
-
def enqueue_job(job_type:, connector_settings:)
|
16
|
-
raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
|
17
|
-
raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
|
18
|
-
|
19
|
-
ElasticConnectorActions.create_job(connector_settings: connector_settings)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class UnsupportedJobType < StandardError; end
|
25
|
-
end
|
26
|
-
end
|
data/lib/utility/bulk_queue.rb
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
require 'json'
|
8
|
-
|
9
|
-
module Utility
|
10
|
-
class BulkQueue
|
11
|
-
class QueueOverflowError < StandardError; end
|
12
|
-
|
13
|
-
# 500 items or 5MB
|
14
|
-
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
-
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
-
@size_threshold = size_threshold.freeze
|
17
|
-
|
18
|
-
@buffer = ''
|
19
|
-
|
20
|
-
@current_operation_count = 0
|
21
|
-
|
22
|
-
@current_buffer_size = 0
|
23
|
-
@current_data_size = 0
|
24
|
-
end
|
25
|
-
|
26
|
-
def pop_all
|
27
|
-
result = @buffer
|
28
|
-
|
29
|
-
reset
|
30
|
-
|
31
|
-
result
|
32
|
-
end
|
33
|
-
|
34
|
-
def add(operation, payload = nil)
|
35
|
-
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
-
|
37
|
-
operation_size = get_size(operation)
|
38
|
-
payload_size = get_size(payload)
|
39
|
-
|
40
|
-
@current_operation_count += 1
|
41
|
-
@current_buffer_size += operation_size
|
42
|
-
@current_buffer_size += payload_size
|
43
|
-
@current_data_size += payload_size
|
44
|
-
|
45
|
-
@buffer << operation
|
46
|
-
@buffer << "\n"
|
47
|
-
|
48
|
-
if payload
|
49
|
-
@buffer << payload
|
50
|
-
@buffer << "\n"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def will_fit?(operation, payload = nil)
|
55
|
-
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
-
|
57
|
-
operation_size = get_size(operation)
|
58
|
-
payload_size = get_size(payload)
|
59
|
-
|
60
|
-
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
-
end
|
62
|
-
|
63
|
-
def current_stats
|
64
|
-
{
|
65
|
-
:current_operation_count => @current_operation_count,
|
66
|
-
:current_buffer_size => @current_buffer_size
|
67
|
-
}
|
68
|
-
end
|
69
|
-
|
70
|
-
private
|
71
|
-
|
72
|
-
def get_size(str)
|
73
|
-
return 0 unless str
|
74
|
-
str.bytesize
|
75
|
-
end
|
76
|
-
|
77
|
-
def reset
|
78
|
-
@current_operation_count = 0
|
79
|
-
@current_buffer_size = 0
|
80
|
-
@current_data_size = 0
|
81
|
-
|
82
|
-
@buffer = ''
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
@@ -1,108 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'time'
|
10
|
-
require 'utility/errors'
|
11
|
-
require 'utility/exception_tracking'
|
12
|
-
|
13
|
-
module Utility
|
14
|
-
class ErrorMonitor
|
15
|
-
class MonitoringError < StandardError
|
16
|
-
attr_accessor :tripped_by
|
17
|
-
|
18
|
-
def initialize(message = nil, tripped_by: nil)
|
19
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
-
@tripped_by = tripped_by
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
-
class MaxErrorsExceededError < MonitoringError; end
|
26
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
-
|
28
|
-
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
-
|
30
|
-
def initialize(
|
31
|
-
max_errors: 1000,
|
32
|
-
max_consecutive_errors: 10,
|
33
|
-
max_error_ratio: 0.15,
|
34
|
-
window_size: 100,
|
35
|
-
error_queue_size: 20
|
36
|
-
)
|
37
|
-
@max_errors = max_errors
|
38
|
-
@max_consecutive_errors = max_consecutive_errors
|
39
|
-
@max_error_ratio = max_error_ratio
|
40
|
-
@window_size = window_size
|
41
|
-
@total_error_count = 0
|
42
|
-
@success_count = 0
|
43
|
-
@consecutive_error_count = 0
|
44
|
-
@window_errors = Array.new(window_size) { false }
|
45
|
-
@window_index = 0
|
46
|
-
@last_error = nil
|
47
|
-
@error_queue_size = error_queue_size
|
48
|
-
@error_queue = []
|
49
|
-
end
|
50
|
-
|
51
|
-
def note_success
|
52
|
-
@consecutive_error_count = 0
|
53
|
-
@success_count += 1
|
54
|
-
increment_window_index
|
55
|
-
end
|
56
|
-
|
57
|
-
def note_error(error, id: Time.now.to_i)
|
58
|
-
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
-
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
-
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
-
@total_error_count += 1
|
62
|
-
@consecutive_error_count += 1
|
63
|
-
@window_errors[@window_index] = true
|
64
|
-
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
-
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
-
increment_window_index
|
67
|
-
@last_error = error
|
68
|
-
|
69
|
-
raise_if_necessary
|
70
|
-
end
|
71
|
-
|
72
|
-
def finalize
|
73
|
-
total_documents = @total_error_count + @success_count
|
74
|
-
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
-
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
private
|
80
|
-
|
81
|
-
def raise_if_necessary
|
82
|
-
error =
|
83
|
-
if @consecutive_error_count > @max_consecutive_errors
|
84
|
-
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
-
elsif @total_error_count > @max_errors
|
86
|
-
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
-
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
-
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
-
end
|
90
|
-
|
91
|
-
raise_with_last_cause(error) if error
|
92
|
-
end
|
93
|
-
|
94
|
-
def num_errors_in_window
|
95
|
-
@window_errors.count(&:itself).to_f
|
96
|
-
end
|
97
|
-
|
98
|
-
def increment_window_index
|
99
|
-
@window_index = (@window_index + 1) % @window_size
|
100
|
-
end
|
101
|
-
|
102
|
-
def raise_with_last_cause(error)
|
103
|
-
raise @last_error
|
104
|
-
rescue StandardError
|
105
|
-
raise error
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
data/lib/utility/filtering.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Utility
|
10
|
-
class Filtering
|
11
|
-
class << self
|
12
|
-
def extract_filter(filtering)
|
13
|
-
return {} unless filtering.present?
|
14
|
-
|
15
|
-
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
-
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
-
|
18
|
-
filter.present? ? filter : {}
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|