connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +10 -8
- data/lib/app/config.rb +6 -1
- data/lib/app/console_app.rb +1 -1
- data/lib/app/dispatcher.rb +18 -3
- data/lib/connectors/base/connector.rb +39 -22
- data/lib/connectors/crawler/scheduler.rb +36 -0
- data/lib/connectors/example/connector.rb +2 -2
- data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
- data/lib/connectors/gitlab/connector.rb +4 -4
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
- data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
- data/lib/connectors/mongodb/connector.rb +66 -56
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
- data/lib/connectors/registry.rb +1 -1
- data/lib/connectors/tolerable_error_helper.rb +5 -1
- data/lib/connectors_utility.rb +6 -3
- data/lib/core/configuration.rb +13 -1
- data/lib/core/connector_job.rb +48 -7
- data/lib/core/connector_settings.rb +52 -20
- data/lib/core/elastic_connector_actions.rb +54 -38
- data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
- data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
- data/lib/core/filtering/filter_validator.rb +103 -0
- data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
- data/lib/core/filtering/post_process_engine.rb +2 -2
- data/lib/core/filtering/processing_stage.rb +20 -0
- data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
- data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
- data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
- data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
- data/lib/core/filtering/transform/filter_transformer.rb +26 -0
- data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
- data/lib/core/filtering/transform/transformation_target.rb +10 -0
- data/lib/core/filtering/validation_job_runner.rb +1 -3
- data/lib/core/filtering.rb +5 -3
- data/lib/core/job_cleanup.rb +66 -0
- data/lib/core/jobs/consumer.rb +62 -64
- data/lib/core/jobs/producer.rb +3 -0
- data/lib/core/scheduler.rb +67 -52
- data/lib/core/sync_job_runner.rb +170 -83
- data/lib/core.rb +1 -0
- data/lib/utility/bulk_queue.rb +1 -1
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/error_monitor.rb +26 -5
- data/lib/utility/es_client.rb +4 -0
- data/lib/utility/filtering.rb +4 -0
- metadata +32 -21
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
+
require 'connectors/sync_status'
|
11
12
|
require 'core/filtering/post_process_engine'
|
12
13
|
require 'core/ingestion'
|
13
14
|
require 'core/filtering/validation_status'
|
@@ -20,13 +21,42 @@ module Core
|
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
24
|
+
class ConnectorNotFoundError < StandardError
|
25
|
+
def initialize(connector_id)
|
26
|
+
super("Connector is not found for connector ID '#{connector_id}'.")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class ConnectorJobNotFoundError < StandardError
|
31
|
+
def initialize(job_id)
|
32
|
+
super("Connector job is not found for job ID '#{job_id}'.")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class ConnectorJobCanceledError < StandardError
|
37
|
+
def initialize(job_id)
|
38
|
+
super("Connector job (ID: '#{job_id}') is requested to be canceled.")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class ConnectorJobNotRunningError < StandardError
|
43
|
+
def initialize(job_id, status)
|
44
|
+
super("Connector job (ID: '#{job_id}') is not running but in status of '#{status}'.")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
23
48
|
class SyncJobRunner
|
24
49
|
JOB_REPORTING_INTERVAL = 10
|
25
50
|
|
26
51
|
def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
|
27
52
|
@connector_settings = connector_settings
|
53
|
+
@connector_id = connector_settings.id
|
54
|
+
@index_name = job.index_name
|
55
|
+
@service_type = job.service_type
|
56
|
+
@job = job
|
57
|
+
@job_id = job.id
|
28
58
|
@sink = Core::Ingestion::EsSink.new(
|
29
|
-
|
59
|
+
@index_name,
|
30
60
|
@connector_settings.request_pipeline,
|
31
61
|
Utility::BulkQueue.new(
|
32
62
|
max_ingestion_queue_size,
|
@@ -34,16 +64,7 @@ module Core
|
|
34
64
|
),
|
35
65
|
max_ingestion_queue_bytes
|
36
66
|
)
|
37
|
-
@connector_class = Connectors::REGISTRY.connector_class(
|
38
|
-
@sync_finished = false
|
39
|
-
@sync_error = nil
|
40
|
-
@status = {
|
41
|
-
:indexed_document_count => 0,
|
42
|
-
:deleted_document_count => 0,
|
43
|
-
:indexed_document_volume => 0,
|
44
|
-
:error => nil
|
45
|
-
}
|
46
|
-
@job = job
|
67
|
+
@connector_class = Connectors::REGISTRY.connector_class(@service_type)
|
47
68
|
end
|
48
69
|
|
49
70
|
def execute
|
@@ -54,134 +75,200 @@ module Core
|
|
54
75
|
private
|
55
76
|
|
56
77
|
def do_sync!
|
57
|
-
|
58
|
-
|
59
|
-
# connector service doesn't support multiple jobs running simultaneously
|
60
|
-
raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
|
61
|
-
|
62
|
-
Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
|
63
|
-
|
64
|
-
# claim the job
|
65
|
-
@job.make_running!
|
66
|
-
|
67
|
-
job_description = @job.es_source
|
68
|
-
job_id = @job.id
|
69
|
-
job_description['_id'] = job_id
|
70
|
-
|
71
|
-
unless job_id.present?
|
72
|
-
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
73
|
-
return
|
74
|
-
end
|
78
|
+
return unless claim_job!
|
75
79
|
|
76
80
|
begin
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
81
|
+
# We want to validate advanced filtering rules even if basic rules are disabled
|
82
|
+
if @connector_settings.any_filtering_feature_enabled?
|
83
|
+
Utility::Logger.info("Checking active filtering for sync job #{@job_id} for connector #{@connector_id}.")
|
84
|
+
validate_filtering(@job.filtering)
|
85
|
+
Utility::Logger.debug("Active filtering for sync job #{@job_id} for connector #{@connector_id} is valid.")
|
86
|
+
end
|
82
87
|
|
83
|
-
connector_instance = Connectors::REGISTRY.connector(@
|
88
|
+
@connector_instance = Connectors::REGISTRY.connector(@service_type, @connector_settings.configuration, job_description: @job)
|
89
|
+
@connector_instance.do_health_check!
|
84
90
|
|
85
|
-
|
91
|
+
@sync_status = nil
|
92
|
+
@sync_error = nil
|
93
|
+
@reporting_cycle_start = Time.now
|
86
94
|
|
87
95
|
incoming_ids = []
|
88
|
-
existing_ids = ElasticConnectorActions.fetch_document_ids(@
|
89
|
-
|
90
|
-
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
91
|
-
|
92
|
-
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
93
|
-
reporting_cycle_start = Time.now
|
94
|
-
Utility::Logger.info('Yielding documents...')
|
95
|
-
connector_instance.yield_documents do |document|
|
96
|
-
document = add_ingest_metadata(document)
|
97
|
-
post_process_result = post_processing_engine.process(document)
|
98
|
-
if post_process_result.is_include?
|
99
|
-
@sink.ingest(document)
|
100
|
-
incoming_ids << document['id']
|
101
|
-
end
|
96
|
+
existing_ids = ElasticConnectorActions.fetch_document_ids(@index_name)
|
102
97
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
98
|
+
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@index_name}.")
|
99
|
+
|
100
|
+
post_processing_engine = @connector_settings.filtering_rule_feature_enabled? ? Core::Filtering::PostProcessEngine.new(@job.filtering) : nil
|
101
|
+
|
102
|
+
yield_docs do |document|
|
103
|
+
next if post_processing_engine && !post_processing_engine.process(document).is_include?
|
104
|
+
@sink.ingest(document)
|
105
|
+
incoming_ids << document['id']
|
107
106
|
end
|
108
107
|
|
109
108
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
110
109
|
|
111
|
-
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@
|
110
|
+
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@index_name}.")
|
112
111
|
|
113
112
|
ids_to_delete.each do |id|
|
114
113
|
@sink.delete(id)
|
115
114
|
|
116
|
-
|
117
|
-
|
118
|
-
|
115
|
+
periodically do
|
116
|
+
check_job
|
117
|
+
@job.update_metadata(@sink.ingestion_stats, @connector_instance.metadata)
|
119
118
|
end
|
120
119
|
end
|
121
120
|
|
122
121
|
@sink.flush
|
123
122
|
|
123
|
+
# force check at the end
|
124
|
+
check_job
|
125
|
+
|
124
126
|
# We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
|
125
127
|
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
126
|
-
@
|
128
|
+
@sync_status = Connectors::SyncStatus::COMPLETED
|
129
|
+
@sync_error = nil
|
130
|
+
rescue ConnectorNotFoundError, ConnectorJobNotFoundError, ConnectorJobNotRunningError => e
|
131
|
+
Utility::Logger.error(e.message)
|
132
|
+
@sync_status = Connectors::SyncStatus::ERROR
|
133
|
+
@sync_error = e.message
|
134
|
+
rescue ConnectorJobCanceledError => e
|
135
|
+
Utility::Logger.error(e.message)
|
136
|
+
@sync_status = Connectors::SyncStatus::CANCELED
|
137
|
+
# Cancelation is an expected action and we shouldn't log an error
|
138
|
+
@sync_error = nil
|
127
139
|
rescue StandardError => e
|
140
|
+
@sync_status = Connectors::SyncStatus::ERROR
|
128
141
|
@sync_error = e.message
|
129
142
|
Utility::ExceptionTracking.log_exception(e)
|
130
143
|
ensure
|
131
144
|
stats = @sink.ingestion_stats
|
132
145
|
|
133
146
|
Utility::Logger.debug("Sync stats are: #{stats}")
|
134
|
-
|
135
|
-
|
136
|
-
@status[:deleted_document_count] = stats[:deleted_document_count]
|
137
|
-
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
138
|
-
|
139
|
-
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
140
|
-
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
147
|
+
Utility::Logger.info("Upserted #{stats[:indexed_document_count]} documents into #{@index_name}.")
|
148
|
+
Utility::Logger.info("Deleted #{stats[:deleted_document_count]} documents into #{@index_name}.")
|
141
149
|
|
142
150
|
# Make sure to not override a previous error message
|
143
|
-
|
144
|
-
|
151
|
+
@sync_status ||= Connectors::SyncStatus::ERROR
|
152
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.' if @sync_status == Connectors::SyncStatus::ERROR && @sync_error.nil?
|
153
|
+
|
154
|
+
# update job if it's still present
|
155
|
+
if reload_job!
|
156
|
+
case @sync_status
|
157
|
+
when Connectors::SyncStatus::COMPLETED
|
158
|
+
@job.done!(stats, @connector_instance&.metadata)
|
159
|
+
when Connectors::SyncStatus::CANCELED
|
160
|
+
@job.cancel!(stats, @connector_instance&.metadata)
|
161
|
+
when Connectors::SyncStatus::ERROR
|
162
|
+
@job.error!(@sync_error, stats, @connector_instance&.metadata)
|
163
|
+
else
|
164
|
+
Utility::Logger.error("The job is supposed to be in one of the terminal statuses (#{Connectors::SyncStatus::TERMINAL_STATUSES.join(', ')}), but it's #{@sync_status}")
|
165
|
+
@sync_status = Connectors::SyncStatus::ERROR
|
166
|
+
@sync_error = 'The job is not ended as expected for unknown reason'
|
167
|
+
@job.error!(@sync_error, stats, @connector_instance&.metadata)
|
168
|
+
end
|
169
|
+
# need to reload the job to get the latest job status
|
170
|
+
reload_job!
|
145
171
|
end
|
146
172
|
|
147
|
-
|
148
|
-
|
149
|
-
|
173
|
+
# update connector if it's still present
|
174
|
+
if reload_connector!
|
175
|
+
@connector_settings.update_last_sync!(@job)
|
150
176
|
end
|
151
177
|
|
152
|
-
|
178
|
+
Utility::Logger.info("Completed the job (ID: #{@job_id}) with status: #{@sync_status}#{@sync_error ? " and error: #{@sync_error}" : ''}")
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def yield_docs
|
183
|
+
@connector_instance.yield_documents do |document|
|
184
|
+
document = add_ingest_metadata(document)
|
153
185
|
|
154
|
-
if
|
155
|
-
|
156
|
-
|
157
|
-
|
186
|
+
yield(document) if block_given?
|
187
|
+
|
188
|
+
periodically do
|
189
|
+
check_job
|
190
|
+
@job.update_metadata(@sink.ingestion_stats, @connector_instance.metadata)
|
158
191
|
end
|
159
192
|
end
|
160
193
|
end
|
161
194
|
|
195
|
+
def claim_job!
|
196
|
+
Utility::Logger.info("Claiming job (ID: #{@job_id}) for connector (ID: #{@connector_id}).")
|
197
|
+
|
198
|
+
# connector service doesn't support multiple jobs running simultaneously
|
199
|
+
if @connector_settings.running?
|
200
|
+
Utility::Logger.warn("Failed to claim job (ID: #{@job_id}) for connector (ID: #{@connector_id}), there are already jobs running.")
|
201
|
+
return false
|
202
|
+
end
|
203
|
+
|
204
|
+
begin
|
205
|
+
Core::ElasticConnectorActions.update_connector_sync_start(@connector_id)
|
206
|
+
|
207
|
+
@job.make_running!
|
208
|
+
|
209
|
+
Utility::Logger.info("Successfully claimed job (ID: #{@job_id}) for connector (ID: #{@connector_id}).")
|
210
|
+
true
|
211
|
+
rescue StandardError => e
|
212
|
+
Utility::ExceptionTracking.log_exception(e)
|
213
|
+
Utility::Logger.error("Failed to claim job (ID: #{@job_id}) for connector (ID: #{@connector_id}). Please check the logs for the cause of this error.")
|
214
|
+
false
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
162
218
|
def add_ingest_metadata(document)
|
219
|
+
return document unless @job
|
163
220
|
document.tap do |it|
|
164
|
-
it['_extract_binary_content'] = @
|
165
|
-
it['_reduce_whitespace'] = @
|
166
|
-
it['_run_ml_inference'] = @
|
221
|
+
it['_extract_binary_content'] = @job.extract_binary_content? if @job.extract_binary_content?
|
222
|
+
it['_reduce_whitespace'] = @job.reduce_whitespace? if @job.reduce_whitespace?
|
223
|
+
it['_run_ml_inference'] = @job.run_ml_inference? if @job.run_ml_inference?
|
167
224
|
end
|
168
225
|
end
|
169
226
|
|
170
227
|
def validate_configuration!
|
171
228
|
expected_fields = @connector_class.configurable_fields.keys.map(&:to_s).sort
|
172
|
-
actual_fields = @
|
229
|
+
actual_fields = @job.configuration.keys.map(&:to_s).sort
|
173
230
|
|
174
|
-
raise IncompatibleConfigurableFieldsError.new(@
|
231
|
+
raise IncompatibleConfigurableFieldsError.new(@service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
175
232
|
end
|
176
233
|
|
177
234
|
def validate_filtering(filtering)
|
178
235
|
validation_result = @connector_class.validate_filtering(filtering)
|
179
236
|
|
180
|
-
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@
|
237
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_id}. Please check active filtering in connectors index.")
|
181
238
|
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
182
239
|
|
183
|
-
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@
|
240
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_id}. Please check active filtering in connectors index.")
|
184
241
|
raise errors_present_error if validation_result[:errors].present?
|
185
242
|
end
|
243
|
+
|
244
|
+
def periodically
|
245
|
+
return if Time.now - @reporting_cycle_start < JOB_REPORTING_INTERVAL
|
246
|
+
|
247
|
+
yield if block_given?
|
248
|
+
|
249
|
+
@reporting_cycle_start = Time.now
|
250
|
+
end
|
251
|
+
|
252
|
+
def check_job
|
253
|
+
# raise error if the connector is deleted
|
254
|
+
raise ConnectorNotFoundError.new(@connector_id) unless reload_connector!
|
255
|
+
|
256
|
+
# raise error if the job is deleted
|
257
|
+
raise ConnectorJobNotFoundError.new(@job_id) unless reload_job!
|
258
|
+
|
259
|
+
# raise error if the job is canceled
|
260
|
+
raise ConnectorJobCanceledError.new(@job_id) if @job.canceling?
|
261
|
+
|
262
|
+
# raise error if the job is not in the status in_progress
|
263
|
+
raise ConnectorJobNotRunningError.new(@job_id, @job.status) unless @job.in_progress?
|
264
|
+
end
|
265
|
+
|
266
|
+
def reload_job!
|
267
|
+
@job = ConnectorJob.fetch_by_id(@job_id)
|
268
|
+
end
|
269
|
+
|
270
|
+
def reload_connector!
|
271
|
+
@connector_settings = ConnectorSettings.fetch_by_id(@connector_id)
|
272
|
+
end
|
186
273
|
end
|
187
274
|
end
|
data/lib/core.rb
CHANGED
data/lib/utility/bulk_queue.rb
CHANGED
@@ -13,7 +13,7 @@ module Utility
|
|
13
13
|
class QueueOverflowError < StandardError; end
|
14
14
|
|
15
15
|
# 500 items or 5MB
|
16
|
-
def initialize(operation_count_threshold = Utility::Constants::
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
17
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
18
18
|
@size_threshold = size_threshold.freeze
|
19
19
|
|
data/lib/utility/constants.rb
CHANGED
@@ -16,8 +16,6 @@ module Utility
|
|
16
16
|
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
17
|
CONTENT_INDEX_PREFIX = 'search-'
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
-
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
|
-
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
19
|
|
22
20
|
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
21
|
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
@@ -51,7 +51,7 @@ module Utility
|
|
51
51
|
def note_success
|
52
52
|
@consecutive_error_count = 0
|
53
53
|
@success_count += 1
|
54
|
-
|
54
|
+
track_window_error(false)
|
55
55
|
end
|
56
56
|
|
57
57
|
def note_error(error, id: Time.now.to_i)
|
@@ -60,10 +60,9 @@ module Utility
|
|
60
60
|
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
61
|
@total_error_count += 1
|
62
62
|
@consecutive_error_count += 1
|
63
|
-
@window_errors[@window_index] = true
|
64
63
|
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
64
|
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
-
|
65
|
+
track_window_error(true)
|
67
66
|
@last_error = error
|
68
67
|
|
69
68
|
raise_if_necessary
|
@@ -92,10 +91,32 @@ module Utility
|
|
92
91
|
end
|
93
92
|
|
94
93
|
def num_errors_in_window
|
95
|
-
@window_errors.count(
|
94
|
+
@window_errors.count(true).to_f
|
96
95
|
end
|
97
96
|
|
98
|
-
def
|
97
|
+
def track_window_error(is_error)
|
98
|
+
# We keep the errors array of the size @window_size this way, imagine @window_size = 5
|
99
|
+
# Error array inits as falses:
|
100
|
+
# [ false, false, false, false, false ]
|
101
|
+
# Third document raises an error:
|
102
|
+
# [ false, false, true, false, false ]
|
103
|
+
# ^^^^
|
104
|
+
# 2 % 5 == 2
|
105
|
+
# Fifth document raises an error:
|
106
|
+
# [ false, false, true, false, true ]
|
107
|
+
# ^^^^
|
108
|
+
# 4 % 5 == 4
|
109
|
+
# Sixth document raises an error:
|
110
|
+
# [ true, false, true, false, true ]
|
111
|
+
# ^^^^
|
112
|
+
# 5 % 5 == 0
|
113
|
+
#
|
114
|
+
# Eigth document is successful:
|
115
|
+
# [ true, false, false, false, true ]
|
116
|
+
# ^^^^^
|
117
|
+
# 7 % 5 == 2
|
118
|
+
# And so on.
|
119
|
+
@window_errors[@window_index] = is_error
|
99
120
|
@window_index = (@window_index + 1) % @window_size
|
100
121
|
end
|
101
122
|
|
data/lib/utility/es_client.rb
CHANGED
@@ -43,6 +43,10 @@ module Utility
|
|
43
43
|
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
44
|
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
45
|
|
46
|
+
# headers
|
47
|
+
# these are necessary for cloud-hosted native connectors
|
48
|
+
configs[:headers] = es_config[:headers].to_h if es_config[:headers]
|
49
|
+
|
46
50
|
# if log or trace is activated, we use the application logger
|
47
51
|
configs[:logger] = if configs[:log] || configs[:trace]
|
48
52
|
Utility::Logger.logger
|
data/lib/utility/filtering.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.11.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 6.1.7.3
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 6.1.7.3
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: attr_extras
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -198,14 +198,14 @@ dependencies:
|
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 8.
|
201
|
+
version: 8.8.0
|
202
202
|
type: :runtime
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version: 8.
|
208
|
+
version: 8.8.0
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
210
|
name: faraday
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -350,16 +350,16 @@ dependencies:
|
|
350
350
|
name: tzinfo
|
351
351
|
requirement: !ruby/object:Gem::Requirement
|
352
352
|
requirements:
|
353
|
-
- - "
|
353
|
+
- - "~>"
|
354
354
|
- !ruby/object:Gem::Version
|
355
|
-
version: '0'
|
355
|
+
version: '2.0'
|
356
356
|
type: :runtime
|
357
357
|
prerelease: false
|
358
358
|
version_requirements: !ruby/object:Gem::Requirement
|
359
359
|
requirements:
|
360
|
-
- - "
|
360
|
+
- - "~>"
|
361
361
|
- !ruby/object:Gem::Version
|
362
|
-
version: '0'
|
362
|
+
version: '2.0'
|
363
363
|
- !ruby/object:Gem::Dependency
|
364
364
|
name: tzinfo-data
|
365
365
|
requirement: !ruby/object:Gem::Requirement
|
@@ -396,11 +396,8 @@ files:
|
|
396
396
|
- lib/app/version.rb
|
397
397
|
- lib/connectors.rb
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
|
-
- lib/connectors/base/advanced_snippet_against_schema_validator.rb
|
400
|
-
- lib/connectors/base/advanced_snippet_validator.rb
|
401
399
|
- lib/connectors/base/connector.rb
|
402
400
|
- lib/connectors/base/custom_client.rb
|
403
|
-
- lib/connectors/base/simple_rules_parser.rb
|
404
401
|
- lib/connectors/connector_status.rb
|
405
402
|
- lib/connectors/crawler/scheduler.rb
|
406
403
|
- lib/connectors/example/attachments/first_attachment.txt
|
@@ -413,14 +410,14 @@ files:
|
|
413
410
|
- lib/connectors/gitlab/custom_client.rb
|
414
411
|
- lib/connectors/gitlab/extractor.rb
|
415
412
|
- lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
|
413
|
+
- lib/connectors/job_trigger_method.rb
|
416
414
|
- lib/connectors/mongodb/connector.rb
|
417
415
|
- lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
|
418
416
|
- lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
|
419
|
-
- lib/connectors/mongodb/
|
417
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb
|
420
418
|
- lib/connectors/registry.rb
|
421
419
|
- lib/connectors/sync_status.rb
|
422
420
|
- lib/connectors/tolerable_error_helper.rb
|
423
|
-
- lib/connectors_app/\
|
424
421
|
- lib/connectors_service.rb
|
425
422
|
- lib/connectors_utility.rb
|
426
423
|
- lib/core.rb
|
@@ -429,14 +426,28 @@ files:
|
|
429
426
|
- lib/core/connector_settings.rb
|
430
427
|
- lib/core/elastic_connector_actions.rb
|
431
428
|
- lib/core/filtering.rb
|
429
|
+
- lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb
|
430
|
+
- lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb
|
431
|
+
- lib/core/filtering/filter_validator.rb
|
432
|
+
- lib/core/filtering/hash_against_schema_validator.rb
|
432
433
|
- lib/core/filtering/post_process_engine.rb
|
433
434
|
- lib/core/filtering/post_process_result.rb
|
434
|
-
- lib/core/filtering/
|
435
|
+
- lib/core/filtering/processing_stage.rb
|
436
|
+
- lib/core/filtering/simple_rules/simple_rule.rb
|
437
|
+
- lib/core/filtering/simple_rules/simple_rules_parser.rb
|
438
|
+
- lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb
|
439
|
+
- lib/core/filtering/simple_rules/validation/simple_rules_schema.rb
|
440
|
+
- lib/core/filtering/simple_rules/validation/simple_rules_validator.rb
|
441
|
+
- lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb
|
442
|
+
- lib/core/filtering/transform/filter_transformer.rb
|
443
|
+
- lib/core/filtering/transform/filter_transformer_facade.rb
|
444
|
+
- lib/core/filtering/transform/transformation_target.rb
|
435
445
|
- lib/core/filtering/validation_job_runner.rb
|
436
446
|
- lib/core/filtering/validation_status.rb
|
437
447
|
- lib/core/heartbeat.rb
|
438
448
|
- lib/core/ingestion.rb
|
439
449
|
- lib/core/ingestion/es_sink.rb
|
450
|
+
- lib/core/job_cleanup.rb
|
440
451
|
- lib/core/jobs/consumer.rb
|
441
452
|
- lib/core/jobs/producer.rb
|
442
453
|
- lib/core/native_scheduler.rb
|
@@ -470,7 +481,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
470
481
|
licenses:
|
471
482
|
- Elastic-2.0
|
472
483
|
metadata: {}
|
473
|
-
post_install_message:
|
484
|
+
post_install_message:
|
474
485
|
rdoc_options: []
|
475
486
|
require_paths:
|
476
487
|
- lib
|
@@ -481,12 +492,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
481
492
|
version: '0'
|
482
493
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
483
494
|
requirements:
|
484
|
-
- - "
|
495
|
+
- - ">="
|
485
496
|
- !ruby/object:Gem::Version
|
486
|
-
version:
|
497
|
+
version: '0'
|
487
498
|
requirements: []
|
488
499
|
rubygems_version: 3.0.3.1
|
489
|
-
signing_key:
|
500
|
+
signing_key:
|
490
501
|
specification_version: 4
|
491
502
|
summary: Gem containing Elastic connectors service
|
492
503
|
test_files: []
|
@@ -1,34 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
# frozen_string_literal: true
|
7
|
-
|
8
|
-
require 'utility/logger'
|
9
|
-
|
10
|
-
module Connectors
|
11
|
-
module Base
|
12
|
-
class AdvancedSnippetValidator
|
13
|
-
|
14
|
-
def initialize(advanced_snippet)
|
15
|
-
@advanced_snippet = advanced_snippet || {}
|
16
|
-
end
|
17
|
-
|
18
|
-
def is_snippet_valid?
|
19
|
-
raise 'Advanced Snippet validation not implemented'
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def log_validation_result(validation_result)
|
25
|
-
Utility::Logger.info("Filtering Advanced Configuration validation result: #{validation_result[:state]}")
|
26
|
-
if validation_result[:errors].present?
|
27
|
-
validation_result[:errors].each do |error|
|
28
|
-
Utility::Logger.warn("Validation error for: '#{error[:ids]}': '#{error[:messages]}'")
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|