connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +0 -4
- data/lib/app/dispatcher.rb +17 -42
- data/lib/app/preflight_check.rb +0 -11
- data/lib/connectors/base/connector.rb +14 -43
- data/lib/connectors/example/connector.rb +0 -6
- data/lib/connectors/gitlab/connector.rb +1 -6
- data/lib/connectors/mongodb/connector.rb +43 -47
- data/lib/connectors/sync_status.rb +1 -6
- data/lib/core/configuration.rb +1 -3
- data/lib/core/connector_settings.rb +16 -52
- data/lib/core/elastic_connector_actions.rb +59 -320
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
- data/lib/core/scheduler.rb +10 -40
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +16 -72
- data/lib/core.rb +0 -4
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/errors.rb +12 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +4 -11
- metadata +9 -27
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
- data/lib/connectors/tolerable_error_helper.rb +0 -43
- data/lib/core/connector_job.rb +0 -210
- data/lib/core/filtering/post_process_engine.rb +0 -39
- data/lib/core/filtering/post_process_result.rb +0 -27
- data/lib/core/filtering/simple_rule.rb +0 -141
- data/lib/core/filtering/validation_job_runner.rb +0 -53
- data/lib/core/filtering/validation_status.rb +0 -17
- data/lib/core/filtering.rb +0 -17
- data/lib/core/ingestion/es_sink.rb +0 -118
- data/lib/core/jobs/consumer.rb +0 -114
- data/lib/core/jobs/producer.rb +0 -26
- data/lib/utility/bulk_queue.rb +0 -85
- data/lib/utility/error_monitor.rb +0 -108
- data/lib/utility/filtering.rb +0 -22
data/lib/core/jobs/consumer.rb
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module Jobs
|
11
|
-
class Consumer
|
12
|
-
def initialize(scheduler:, poll_interval: 3, termination_timeout: 60, min_threads: 1, max_threads: 5, max_queue: 100, idle_time: 5)
|
13
|
-
@scheduler = scheduler
|
14
|
-
@poll_interval = poll_interval
|
15
|
-
@termination_timeout = termination_timeout
|
16
|
-
@min_threads = min_threads
|
17
|
-
@max_threads = max_threads
|
18
|
-
@max_queue = max_queue
|
19
|
-
@idle_time = idle_time
|
20
|
-
|
21
|
-
@running = Concurrent::AtomicBoolean.new(false)
|
22
|
-
end
|
23
|
-
|
24
|
-
def subscribe!(index_name:)
|
25
|
-
@index_name = index_name
|
26
|
-
|
27
|
-
start_loop!
|
28
|
-
end
|
29
|
-
|
30
|
-
def running?
|
31
|
-
# @TODO check if a loop thread is alive
|
32
|
-
pool.running? && @running.true?
|
33
|
-
end
|
34
|
-
|
35
|
-
def shutdown!
|
36
|
-
Utility::Logger.info("Shutting down consumer for #{@index_name} index")
|
37
|
-
@running.make_false
|
38
|
-
pool.shutdown
|
39
|
-
pool.wait_for_termination(@termination_timeout)
|
40
|
-
# reset pool
|
41
|
-
@pool = nil
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
def start_loop!
|
47
|
-
Utility::Logger.info("Starting a new consumer for #{@index_name} index")
|
48
|
-
|
49
|
-
Thread.new do
|
50
|
-
# assign a name to the thread
|
51
|
-
# see @TODO in #self.running?
|
52
|
-
Thread.current[:name] = "consumer-group-#{@index_name}"
|
53
|
-
|
54
|
-
loop do
|
55
|
-
if @running.false?
|
56
|
-
Utility::Logger.info('Shutting down the loop')
|
57
|
-
break
|
58
|
-
end
|
59
|
-
|
60
|
-
sleep(@poll_interval)
|
61
|
-
Utility::Logger.debug('Getting registered connectors')
|
62
|
-
|
63
|
-
connectors = ready_for_sync_connectors
|
64
|
-
next unless connectors.any?
|
65
|
-
|
66
|
-
Utility::Logger.debug("Number of available connectors: #{connectors.size}")
|
67
|
-
|
68
|
-
# @TODO It is assumed that @index_name is used to retrive pending jobs.
|
69
|
-
# This will be discussed after 8.6 release
|
70
|
-
pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
|
71
|
-
Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
|
72
|
-
|
73
|
-
pending_jobs.each do |job|
|
74
|
-
connector_settings = connectors[job.connector_id]
|
75
|
-
|
76
|
-
pool.post do
|
77
|
-
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
78
|
-
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
79
|
-
job_runner = Core::SyncJobRunner.new(connector_settings, job)
|
80
|
-
job_runner.execute
|
81
|
-
rescue Core::JobAlreadyRunningError
|
82
|
-
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
83
|
-
rescue Core::ConnectorVersionChangedError => e
|
84
|
-
Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
|
85
|
-
rescue StandardError => e
|
86
|
-
Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
|
87
|
-
end
|
88
|
-
end
|
89
|
-
rescue StandardError => e
|
90
|
-
Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
@running.make_true
|
95
|
-
end
|
96
|
-
|
97
|
-
def pool
|
98
|
-
@pool ||= Concurrent::ThreadPoolExecutor.new(
|
99
|
-
min_threads: @min_threads,
|
100
|
-
max_threads: @max_threads,
|
101
|
-
max_queue: @max_queue,
|
102
|
-
fallback_policy: :abort,
|
103
|
-
idletime: @idle_time
|
104
|
-
)
|
105
|
-
end
|
106
|
-
|
107
|
-
def ready_for_sync_connectors
|
108
|
-
@scheduler.connector_settings
|
109
|
-
.select(&:ready_for_sync?)
|
110
|
-
.inject({}) { |memo, cs| memo.merge(cs.id => cs) }
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
data/lib/core/jobs/producer.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module Jobs
|
11
|
-
class Producer
|
12
|
-
JOB_TYPES = %i(sync).freeze
|
13
|
-
|
14
|
-
class << self
|
15
|
-
def enqueue_job(job_type:, connector_settings:)
|
16
|
-
raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
|
17
|
-
raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
|
18
|
-
|
19
|
-
ElasticConnectorActions.create_job(connector_settings: connector_settings)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class UnsupportedJobType < StandardError; end
|
25
|
-
end
|
26
|
-
end
|
data/lib/utility/bulk_queue.rb
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
require 'json'
|
8
|
-
|
9
|
-
module Utility
|
10
|
-
class BulkQueue
|
11
|
-
class QueueOverflowError < StandardError; end
|
12
|
-
|
13
|
-
# 500 items or 5MB
|
14
|
-
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
-
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
-
@size_threshold = size_threshold.freeze
|
17
|
-
|
18
|
-
@buffer = ''
|
19
|
-
|
20
|
-
@current_operation_count = 0
|
21
|
-
|
22
|
-
@current_buffer_size = 0
|
23
|
-
@current_data_size = 0
|
24
|
-
end
|
25
|
-
|
26
|
-
def pop_all
|
27
|
-
result = @buffer
|
28
|
-
|
29
|
-
reset
|
30
|
-
|
31
|
-
result
|
32
|
-
end
|
33
|
-
|
34
|
-
def add(operation, payload = nil)
|
35
|
-
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
-
|
37
|
-
operation_size = get_size(operation)
|
38
|
-
payload_size = get_size(payload)
|
39
|
-
|
40
|
-
@current_operation_count += 1
|
41
|
-
@current_buffer_size += operation_size
|
42
|
-
@current_buffer_size += payload_size
|
43
|
-
@current_data_size += payload_size
|
44
|
-
|
45
|
-
@buffer << operation
|
46
|
-
@buffer << "\n"
|
47
|
-
|
48
|
-
if payload
|
49
|
-
@buffer << payload
|
50
|
-
@buffer << "\n"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def will_fit?(operation, payload = nil)
|
55
|
-
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
-
|
57
|
-
operation_size = get_size(operation)
|
58
|
-
payload_size = get_size(payload)
|
59
|
-
|
60
|
-
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
-
end
|
62
|
-
|
63
|
-
def current_stats
|
64
|
-
{
|
65
|
-
:current_operation_count => @current_operation_count,
|
66
|
-
:current_buffer_size => @current_buffer_size
|
67
|
-
}
|
68
|
-
end
|
69
|
-
|
70
|
-
private
|
71
|
-
|
72
|
-
def get_size(str)
|
73
|
-
return 0 unless str
|
74
|
-
str.bytesize
|
75
|
-
end
|
76
|
-
|
77
|
-
def reset
|
78
|
-
@current_operation_count = 0
|
79
|
-
@current_buffer_size = 0
|
80
|
-
@current_data_size = 0
|
81
|
-
|
82
|
-
@buffer = ''
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
@@ -1,108 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'time'
|
10
|
-
require 'utility/errors'
|
11
|
-
require 'utility/exception_tracking'
|
12
|
-
|
13
|
-
module Utility
|
14
|
-
class ErrorMonitor
|
15
|
-
class MonitoringError < StandardError
|
16
|
-
attr_accessor :tripped_by
|
17
|
-
|
18
|
-
def initialize(message = nil, tripped_by: nil)
|
19
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
-
@tripped_by = tripped_by
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
-
class MaxErrorsExceededError < MonitoringError; end
|
26
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
-
|
28
|
-
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
-
|
30
|
-
def initialize(
|
31
|
-
max_errors: 1000,
|
32
|
-
max_consecutive_errors: 10,
|
33
|
-
max_error_ratio: 0.15,
|
34
|
-
window_size: 100,
|
35
|
-
error_queue_size: 20
|
36
|
-
)
|
37
|
-
@max_errors = max_errors
|
38
|
-
@max_consecutive_errors = max_consecutive_errors
|
39
|
-
@max_error_ratio = max_error_ratio
|
40
|
-
@window_size = window_size
|
41
|
-
@total_error_count = 0
|
42
|
-
@success_count = 0
|
43
|
-
@consecutive_error_count = 0
|
44
|
-
@window_errors = Array.new(window_size) { false }
|
45
|
-
@window_index = 0
|
46
|
-
@last_error = nil
|
47
|
-
@error_queue_size = error_queue_size
|
48
|
-
@error_queue = []
|
49
|
-
end
|
50
|
-
|
51
|
-
def note_success
|
52
|
-
@consecutive_error_count = 0
|
53
|
-
@success_count += 1
|
54
|
-
increment_window_index
|
55
|
-
end
|
56
|
-
|
57
|
-
def note_error(error, id: Time.now.to_i)
|
58
|
-
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
-
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
-
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
-
@total_error_count += 1
|
62
|
-
@consecutive_error_count += 1
|
63
|
-
@window_errors[@window_index] = true
|
64
|
-
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
-
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
-
increment_window_index
|
67
|
-
@last_error = error
|
68
|
-
|
69
|
-
raise_if_necessary
|
70
|
-
end
|
71
|
-
|
72
|
-
def finalize
|
73
|
-
total_documents = @total_error_count + @success_count
|
74
|
-
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
-
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
private
|
80
|
-
|
81
|
-
def raise_if_necessary
|
82
|
-
error =
|
83
|
-
if @consecutive_error_count > @max_consecutive_errors
|
84
|
-
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
-
elsif @total_error_count > @max_errors
|
86
|
-
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
-
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
-
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
-
end
|
90
|
-
|
91
|
-
raise_with_last_cause(error) if error
|
92
|
-
end
|
93
|
-
|
94
|
-
def num_errors_in_window
|
95
|
-
@window_errors.count(&:itself).to_f
|
96
|
-
end
|
97
|
-
|
98
|
-
def increment_window_index
|
99
|
-
@window_index = (@window_index + 1) % @window_size
|
100
|
-
end
|
101
|
-
|
102
|
-
def raise_with_last_cause(error)
|
103
|
-
raise @last_error
|
104
|
-
rescue StandardError
|
105
|
-
raise error
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
data/lib/utility/filtering.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Utility
|
10
|
-
class Filtering
|
11
|
-
class << self
|
12
|
-
def extract_filter(filtering)
|
13
|
-
return {} unless filtering.present?
|
14
|
-
|
15
|
-
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
-
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
-
|
18
|
-
filter.present? ? filter : {}
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|