connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +10 -8
  3. data/lib/app/config.rb +6 -1
  4. data/lib/app/console_app.rb +1 -1
  5. data/lib/app/dispatcher.rb +18 -3
  6. data/lib/connectors/base/connector.rb +39 -22
  7. data/lib/connectors/crawler/scheduler.rb +36 -0
  8. data/lib/connectors/example/connector.rb +2 -2
  9. data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
  10. data/lib/connectors/gitlab/connector.rb +4 -4
  11. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
  12. data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
  13. data/lib/connectors/mongodb/connector.rb +66 -56
  14. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
  17. data/lib/connectors/registry.rb +1 -1
  18. data/lib/connectors/tolerable_error_helper.rb +5 -1
  19. data/lib/connectors_utility.rb +6 -3
  20. data/lib/core/configuration.rb +13 -1
  21. data/lib/core/connector_job.rb +48 -7
  22. data/lib/core/connector_settings.rb +52 -20
  23. data/lib/core/elastic_connector_actions.rb +54 -38
  24. data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
  25. data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
  26. data/lib/core/filtering/filter_validator.rb +103 -0
  27. data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
  28. data/lib/core/filtering/post_process_engine.rb +2 -2
  29. data/lib/core/filtering/processing_stage.rb +20 -0
  30. data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
  31. data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
  32. data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
  33. data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
  34. data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
  35. data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
  36. data/lib/core/filtering/transform/filter_transformer.rb +26 -0
  37. data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
  38. data/lib/core/filtering/transform/transformation_target.rb +10 -0
  39. data/lib/core/filtering/validation_job_runner.rb +1 -3
  40. data/lib/core/filtering.rb +5 -3
  41. data/lib/core/job_cleanup.rb +66 -0
  42. data/lib/core/jobs/consumer.rb +62 -64
  43. data/lib/core/jobs/producer.rb +3 -0
  44. data/lib/core/scheduler.rb +67 -52
  45. data/lib/core/sync_job_runner.rb +170 -83
  46. data/lib/core.rb +1 -0
  47. data/lib/utility/bulk_queue.rb +1 -1
  48. data/lib/utility/constants.rb +0 -2
  49. data/lib/utility/error_monitor.rb +26 -5
  50. data/lib/utility/es_client.rb +4 -0
  51. data/lib/utility/filtering.rb +4 -0
  52. metadata +32 -21
  53. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  54. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  55. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
+ require 'connectors/sync_status'
11
12
  require 'core/filtering/post_process_engine'
12
13
  require 'core/ingestion'
13
14
  require 'core/filtering/validation_status'
@@ -20,13 +21,42 @@ module Core
20
21
  end
21
22
  end
22
23
 
24
+ class ConnectorNotFoundError < StandardError
25
+ def initialize(connector_id)
26
+ super("Connector is not found for connector ID '#{connector_id}'.")
27
+ end
28
+ end
29
+
30
+ class ConnectorJobNotFoundError < StandardError
31
+ def initialize(job_id)
32
+ super("Connector job is not found for job ID '#{job_id}'.")
33
+ end
34
+ end
35
+
36
+ class ConnectorJobCanceledError < StandardError
37
+ def initialize(job_id)
38
+ super("Connector job (ID: '#{job_id}') is requested to be canceled.")
39
+ end
40
+ end
41
+
42
+ class ConnectorJobNotRunningError < StandardError
43
+ def initialize(job_id, status)
44
+ super("Connector job (ID: '#{job_id}') is not running but in status of '#{status}'.")
45
+ end
46
+ end
47
+
23
48
  class SyncJobRunner
24
49
  JOB_REPORTING_INTERVAL = 10
25
50
 
26
51
  def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
27
52
  @connector_settings = connector_settings
53
+ @connector_id = connector_settings.id
54
+ @index_name = job.index_name
55
+ @service_type = job.service_type
56
+ @job = job
57
+ @job_id = job.id
28
58
  @sink = Core::Ingestion::EsSink.new(
29
- connector_settings.index_name,
59
+ @index_name,
30
60
  @connector_settings.request_pipeline,
31
61
  Utility::BulkQueue.new(
32
62
  max_ingestion_queue_size,
@@ -34,16 +64,7 @@ module Core
34
64
  ),
35
65
  max_ingestion_queue_bytes
36
66
  )
37
- @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
38
- @sync_finished = false
39
- @sync_error = nil
40
- @status = {
41
- :indexed_document_count => 0,
42
- :deleted_document_count => 0,
43
- :indexed_document_volume => 0,
44
- :error => nil
45
- }
46
- @job = job
67
+ @connector_class = Connectors::REGISTRY.connector_class(@service_type)
47
68
  end
48
69
 
49
70
  def execute
@@ -54,134 +75,200 @@ module Core
54
75
  private
55
76
 
56
77
  def do_sync!
57
- Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
58
-
59
- # connector service doesn't support multiple jobs running simultaneously
60
- raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
61
-
62
- Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
63
-
64
- # claim the job
65
- @job.make_running!
66
-
67
- job_description = @job.es_source
68
- job_id = @job.id
69
- job_description['_id'] = job_id
70
-
71
- unless job_id.present?
72
- Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
73
- return
74
- end
78
+ return unless claim_job!
75
79
 
76
80
  begin
77
- Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
78
-
79
- Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
80
- validate_filtering(job_description.dig(:connector, :filtering))
81
- Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
81
+ # We want to validate advanced filtering rules even if basic rules are disabled
82
+ if @connector_settings.any_filtering_feature_enabled?
83
+ Utility::Logger.info("Checking active filtering for sync job #{@job_id} for connector #{@connector_id}.")
84
+ validate_filtering(@job.filtering)
85
+ Utility::Logger.debug("Active filtering for sync job #{@job_id} for connector #{@connector_id} is valid.")
86
+ end
82
87
 
83
- connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
88
+ @connector_instance = Connectors::REGISTRY.connector(@service_type, @connector_settings.configuration, job_description: @job)
89
+ @connector_instance.do_health_check!
84
90
 
85
- connector_instance.do_health_check!
91
+ @sync_status = nil
92
+ @sync_error = nil
93
+ @reporting_cycle_start = Time.now
86
94
 
87
95
  incoming_ids = []
88
- existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
89
-
90
- Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
91
-
92
- post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
93
- reporting_cycle_start = Time.now
94
- Utility::Logger.info('Yielding documents...')
95
- connector_instance.yield_documents do |document|
96
- document = add_ingest_metadata(document)
97
- post_process_result = post_processing_engine.process(document)
98
- if post_process_result.is_include?
99
- @sink.ingest(document)
100
- incoming_ids << document['id']
101
- end
96
+ existing_ids = ElasticConnectorActions.fetch_document_ids(@index_name)
102
97
 
103
- if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
104
- ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
105
- reporting_cycle_start = Time.now
106
- end
98
+ Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@index_name}.")
99
+
100
+ post_processing_engine = @connector_settings.filtering_rule_feature_enabled? ? Core::Filtering::PostProcessEngine.new(@job.filtering) : nil
101
+
102
+ yield_docs do |document|
103
+ next if post_processing_engine && !post_processing_engine.process(document).is_include?
104
+ @sink.ingest(document)
105
+ incoming_ids << document['id']
107
106
  end
108
107
 
109
108
  ids_to_delete = existing_ids - incoming_ids.uniq
110
109
 
111
- Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
110
+ Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@index_name}.")
112
111
 
113
112
  ids_to_delete.each do |id|
114
113
  @sink.delete(id)
115
114
 
116
- if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
117
- ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
118
- reporting_cycle_start = Time.now
115
+ periodically do
116
+ check_job
117
+ @job.update_metadata(@sink.ingestion_stats, @connector_instance.metadata)
119
118
  end
120
119
  end
121
120
 
122
121
  @sink.flush
123
122
 
123
+ # force check at the end
124
+ check_job
125
+
124
126
  # We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
125
127
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
126
- @sync_finished = true
128
+ @sync_status = Connectors::SyncStatus::COMPLETED
129
+ @sync_error = nil
130
+ rescue ConnectorNotFoundError, ConnectorJobNotFoundError, ConnectorJobNotRunningError => e
131
+ Utility::Logger.error(e.message)
132
+ @sync_status = Connectors::SyncStatus::ERROR
133
+ @sync_error = e.message
134
+ rescue ConnectorJobCanceledError => e
135
+ Utility::Logger.error(e.message)
136
+ @sync_status = Connectors::SyncStatus::CANCELED
137
+ # Cancelation is an expected action and we shouldn't log an error
138
+ @sync_error = nil
127
139
  rescue StandardError => e
140
+ @sync_status = Connectors::SyncStatus::ERROR
128
141
  @sync_error = e.message
129
142
  Utility::ExceptionTracking.log_exception(e)
130
143
  ensure
131
144
  stats = @sink.ingestion_stats
132
145
 
133
146
  Utility::Logger.debug("Sync stats are: #{stats}")
134
-
135
- @status[:indexed_document_count] = stats[:indexed_document_count]
136
- @status[:deleted_document_count] = stats[:deleted_document_count]
137
- @status[:indexed_document_volume] = stats[:indexed_document_volume]
138
-
139
- Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
140
- Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
147
+ Utility::Logger.info("Upserted #{stats[:indexed_document_count]} documents into #{@index_name}.")
148
+ Utility::Logger.info("Deleted #{stats[:deleted_document_count]} documents into #{@index_name}.")
141
149
 
142
150
  # Make sure to not override a previous error message
143
- if !@sync_finished && @sync_error.nil?
144
- @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
151
+ @sync_status ||= Connectors::SyncStatus::ERROR
152
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.' if @sync_status == Connectors::SyncStatus::ERROR && @sync_error.nil?
153
+
154
+ # update job if it's still present
155
+ if reload_job!
156
+ case @sync_status
157
+ when Connectors::SyncStatus::COMPLETED
158
+ @job.done!(stats, @connector_instance&.metadata)
159
+ when Connectors::SyncStatus::CANCELED
160
+ @job.cancel!(stats, @connector_instance&.metadata)
161
+ when Connectors::SyncStatus::ERROR
162
+ @job.error!(@sync_error, stats, @connector_instance&.metadata)
163
+ else
164
+ Utility::Logger.error("The job is supposed to be in one of the terminal statuses (#{Connectors::SyncStatus::TERMINAL_STATUSES.join(', ')}), but it's #{@sync_status}")
165
+ @sync_status = Connectors::SyncStatus::ERROR
166
+ @sync_error = 'The job is not ended as expected for unknown reason'
167
+ @job.error!(@sync_error, stats, @connector_instance&.metadata)
168
+ end
169
+ # need to reload the job to get the latest job status
170
+ reload_job!
145
171
  end
146
172
 
147
- unless connector_instance.nil?
148
- metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
149
- metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
173
+ # update connector if it's still present
174
+ if reload_connector!
175
+ @connector_settings.update_last_sync!(@job)
150
176
  end
151
177
 
152
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
178
+ Utility::Logger.info("Completed the job (ID: #{@job_id}) with status: #{@sync_status}#{@sync_error ? " and error: #{@sync_error}" : ''}")
179
+ end
180
+ end
181
+
182
+ def yield_docs
183
+ @connector_instance.yield_documents do |document|
184
+ document = add_ingest_metadata(document)
153
185
 
154
- if @sync_error
155
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
156
- else
157
- Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
186
+ yield(document) if block_given?
187
+
188
+ periodically do
189
+ check_job
190
+ @job.update_metadata(@sink.ingestion_stats, @connector_instance.metadata)
158
191
  end
159
192
  end
160
193
  end
161
194
 
195
+ def claim_job!
196
+ Utility::Logger.info("Claiming job (ID: #{@job_id}) for connector (ID: #{@connector_id}).")
197
+
198
+ # connector service doesn't support multiple jobs running simultaneously
199
+ if @connector_settings.running?
200
+ Utility::Logger.warn("Failed to claim job (ID: #{@job_id}) for connector (ID: #{@connector_id}), there are already jobs running.")
201
+ return false
202
+ end
203
+
204
+ begin
205
+ Core::ElasticConnectorActions.update_connector_sync_start(@connector_id)
206
+
207
+ @job.make_running!
208
+
209
+ Utility::Logger.info("Successfully claimed job (ID: #{@job_id}) for connector (ID: #{@connector_id}).")
210
+ true
211
+ rescue StandardError => e
212
+ Utility::ExceptionTracking.log_exception(e)
213
+ Utility::Logger.error("Failed to claim job (ID: #{@job_id}) for connector (ID: #{@connector_id}). Please check the logs for the cause of this error.")
214
+ false
215
+ end
216
+ end
217
+
162
218
  def add_ingest_metadata(document)
219
+ return document unless @job
163
220
  document.tap do |it|
164
- it['_extract_binary_content'] = @connector_settings.extract_binary_content? if @connector_settings.extract_binary_content?
165
- it['_reduce_whitespace'] = @connector_settings.reduce_whitespace? if @connector_settings.reduce_whitespace?
166
- it['_run_ml_inference'] = @connector_settings.run_ml_inference? if @connector_settings.run_ml_inference?
221
+ it['_extract_binary_content'] = @job.extract_binary_content? if @job.extract_binary_content?
222
+ it['_reduce_whitespace'] = @job.reduce_whitespace? if @job.reduce_whitespace?
223
+ it['_run_ml_inference'] = @job.run_ml_inference? if @job.run_ml_inference?
167
224
  end
168
225
  end
169
226
 
170
227
  def validate_configuration!
171
228
  expected_fields = @connector_class.configurable_fields.keys.map(&:to_s).sort
172
- actual_fields = @connector_settings.configuration.keys.map(&:to_s).sort
229
+ actual_fields = @job.configuration.keys.map(&:to_s).sort
173
230
 
174
- raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
231
+ raise IncompatibleConfigurableFieldsError.new(@service_type, expected_fields, actual_fields) if expected_fields != actual_fields
175
232
  end
176
233
 
177
234
  def validate_filtering(filtering)
178
235
  validation_result = @connector_class.validate_filtering(filtering)
179
236
 
180
- wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
237
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_id}. Please check active filtering in connectors index.")
181
238
  raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
182
239
 
183
- errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
240
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_id}. Please check active filtering in connectors index.")
184
241
  raise errors_present_error if validation_result[:errors].present?
185
242
  end
243
+
244
+ def periodically
245
+ return if Time.now - @reporting_cycle_start < JOB_REPORTING_INTERVAL
246
+
247
+ yield if block_given?
248
+
249
+ @reporting_cycle_start = Time.now
250
+ end
251
+
252
+ def check_job
253
+ # raise error if the connector is deleted
254
+ raise ConnectorNotFoundError.new(@connector_id) unless reload_connector!
255
+
256
+ # raise error if the job is deleted
257
+ raise ConnectorJobNotFoundError.new(@job_id) unless reload_job!
258
+
259
+ # raise error if the job is canceled
260
+ raise ConnectorJobCanceledError.new(@job_id) if @job.canceling?
261
+
262
+ # raise error if the job is not in the status in_progress
263
+ raise ConnectorJobNotRunningError.new(@job_id, @job.status) unless @job.in_progress?
264
+ end
265
+
266
+ def reload_job!
267
+ @job = ConnectorJob.fetch_by_id(@job_id)
268
+ end
269
+
270
+ def reload_connector!
271
+ @connector_settings = ConnectorSettings.fetch_by_id(@connector_id)
272
+ end
186
273
  end
187
274
  end
data/lib/core.rb CHANGED
@@ -12,6 +12,7 @@ require 'core/connector_settings'
12
12
  require 'core/elastic_connector_actions'
13
13
  require 'core/filtering'
14
14
  require 'core/heartbeat'
15
+ require 'core/job_cleanup'
15
16
  require 'core/scheduler'
16
17
  require 'core/single_scheduler'
17
18
  require 'core/native_scheduler'
@@ -13,7 +13,7 @@ module Utility
13
13
  class QueueOverflowError < StandardError; end
14
14
 
15
15
  # 500 items or 5MB
16
- def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
16
+ def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
17
17
  @operation_count_threshold = operation_count_threshold.freeze
18
18
  @size_threshold = size_threshold.freeze
19
19
 
@@ -16,8 +16,6 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
- FILTERING_RULES_FEATURE = 'filtering_rules'
20
- FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
19
 
22
20
  # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
21
  DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
@@ -51,7 +51,7 @@ module Utility
51
51
  def note_success
52
52
  @consecutive_error_count = 0
53
53
  @success_count += 1
54
- increment_window_index
54
+ track_window_error(false)
55
55
  end
56
56
 
57
57
  def note_error(error, id: Time.now.to_i)
@@ -60,10 +60,9 @@ module Utility
60
60
  Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
61
  @total_error_count += 1
62
62
  @consecutive_error_count += 1
63
- @window_errors[@window_index] = true
64
63
  @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
64
  @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
- increment_window_index
65
+ track_window_error(true)
67
66
  @last_error = error
68
67
 
69
68
  raise_if_necessary
@@ -92,10 +91,32 @@ module Utility
92
91
  end
93
92
 
94
93
  def num_errors_in_window
95
- @window_errors.count(&:itself).to_f
94
+ @window_errors.count(true).to_f
96
95
  end
97
96
 
98
- def increment_window_index
97
+ def track_window_error(is_error)
98
+ # We keep the errors array of the size @window_size this way, imagine @window_size = 5
99
+ # Error array inits as falses:
100
+ # [ false, false, false, false, false ]
101
+ # Third document raises an error:
102
+ # [ false, false, true, false, false ]
103
+ # ^^^^
104
+ # 2 % 5 == 2
105
+ # Fifth document raises an error:
106
+ # [ false, false, true, false, true ]
107
+ # ^^^^
108
+ # 4 % 5 == 4
109
+ # Sixth document raises an error:
110
+ # [ true, false, true, false, true ]
111
+ # ^^^^
112
+ # 5 % 5 == 0
113
+ #
114
+ # Eigth document is successful:
115
+ # [ true, false, false, false, true ]
116
+ # ^^^^^
117
+ # 7 % 5 == 2
118
+ # And so on.
119
+ @window_errors[@window_index] = is_error
99
120
  @window_index = (@window_index + 1) % @window_size
100
121
  end
101
122
 
@@ -43,6 +43,10 @@ module Utility
43
43
  configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
44
  configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
45
 
46
+ # headers
47
+ # these are necessary for cloud-hosted native connectors
48
+ configs[:headers] = es_config[:headers].to_h if es_config[:headers]
49
+
46
50
  # if log or trace is activated, we use the application logger
47
51
  configs[:logger] = if configs[:log] || configs[:trace]
48
52
  Utility::Logger.logger
@@ -17,6 +17,10 @@ module Utility
17
17
 
18
18
  filter.present? ? filter : {}
19
19
  end
20
+
21
+ def rule_pre_processing_active?(filter)
22
+ !filter.dig('advanced_snippet', 'value')&.present?
23
+ end
20
24
  end
21
25
  end
22
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.7.0.0.pre.20221117T010623Z
4
+ version: 8.11.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-17 00:00:00.000000000 Z
11
+ date: 2023-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 5.2.6
19
+ version: 6.1.7.3
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 5.2.6
26
+ version: 6.1.7.3
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: attr_extras
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -198,14 +198,14 @@ dependencies:
198
198
  requirements:
199
199
  - - "~>"
200
200
  - !ruby/object:Gem::Version
201
- version: 8.5.0
201
+ version: 8.8.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
- version: 8.5.0
208
+ version: 8.8.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: faraday
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -350,16 +350,16 @@ dependencies:
350
350
  name: tzinfo
351
351
  requirement: !ruby/object:Gem::Requirement
352
352
  requirements:
353
- - - ">="
353
+ - - "~>"
354
354
  - !ruby/object:Gem::Version
355
- version: '0'
355
+ version: '2.0'
356
356
  type: :runtime
357
357
  prerelease: false
358
358
  version_requirements: !ruby/object:Gem::Requirement
359
359
  requirements:
360
- - - ">="
360
+ - - "~>"
361
361
  - !ruby/object:Gem::Version
362
- version: '0'
362
+ version: '2.0'
363
363
  - !ruby/object:Gem::Dependency
364
364
  name: tzinfo-data
365
365
  requirement: !ruby/object:Gem::Requirement
@@ -396,11 +396,8 @@ files:
396
396
  - lib/app/version.rb
397
397
  - lib/connectors.rb
398
398
  - lib/connectors/base/adapter.rb
399
- - lib/connectors/base/advanced_snippet_against_schema_validator.rb
400
- - lib/connectors/base/advanced_snippet_validator.rb
401
399
  - lib/connectors/base/connector.rb
402
400
  - lib/connectors/base/custom_client.rb
403
- - lib/connectors/base/simple_rules_parser.rb
404
401
  - lib/connectors/connector_status.rb
405
402
  - lib/connectors/crawler/scheduler.rb
406
403
  - lib/connectors/example/attachments/first_attachment.txt
@@ -413,14 +410,14 @@ files:
413
410
  - lib/connectors/gitlab/custom_client.rb
414
411
  - lib/connectors/gitlab/extractor.rb
415
412
  - lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
413
+ - lib/connectors/job_trigger_method.rb
416
414
  - lib/connectors/mongodb/connector.rb
417
415
  - lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
418
416
  - lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
419
- - lib/connectors/mongodb/mongo_rules_parser.rb
417
+ - lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb
420
418
  - lib/connectors/registry.rb
421
419
  - lib/connectors/sync_status.rb
422
420
  - lib/connectors/tolerable_error_helper.rb
423
- - lib/connectors_app/\
424
421
  - lib/connectors_service.rb
425
422
  - lib/connectors_utility.rb
426
423
  - lib/core.rb
@@ -429,14 +426,28 @@ files:
429
426
  - lib/core/connector_settings.rb
430
427
  - lib/core/elastic_connector_actions.rb
431
428
  - lib/core/filtering.rb
429
+ - lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb
430
+ - lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb
431
+ - lib/core/filtering/filter_validator.rb
432
+ - lib/core/filtering/hash_against_schema_validator.rb
432
433
  - lib/core/filtering/post_process_engine.rb
433
434
  - lib/core/filtering/post_process_result.rb
434
- - lib/core/filtering/simple_rule.rb
435
+ - lib/core/filtering/processing_stage.rb
436
+ - lib/core/filtering/simple_rules/simple_rule.rb
437
+ - lib/core/filtering/simple_rules/simple_rules_parser.rb
438
+ - lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb
439
+ - lib/core/filtering/simple_rules/validation/simple_rules_schema.rb
440
+ - lib/core/filtering/simple_rules/validation/simple_rules_validator.rb
441
+ - lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb
442
+ - lib/core/filtering/transform/filter_transformer.rb
443
+ - lib/core/filtering/transform/filter_transformer_facade.rb
444
+ - lib/core/filtering/transform/transformation_target.rb
435
445
  - lib/core/filtering/validation_job_runner.rb
436
446
  - lib/core/filtering/validation_status.rb
437
447
  - lib/core/heartbeat.rb
438
448
  - lib/core/ingestion.rb
439
449
  - lib/core/ingestion/es_sink.rb
450
+ - lib/core/job_cleanup.rb
440
451
  - lib/core/jobs/consumer.rb
441
452
  - lib/core/jobs/producer.rb
442
453
  - lib/core/native_scheduler.rb
@@ -470,7 +481,7 @@ homepage: https://github.com/elastic/connectors-ruby
470
481
  licenses:
471
482
  - Elastic-2.0
472
483
  metadata: {}
473
- post_install_message:
484
+ post_install_message:
474
485
  rdoc_options: []
475
486
  require_paths:
476
487
  - lib
@@ -481,12 +492,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
481
492
  version: '0'
482
493
  required_rubygems_version: !ruby/object:Gem::Requirement
483
494
  requirements:
484
- - - ">"
495
+ - - ">="
485
496
  - !ruby/object:Gem::Version
486
- version: 1.3.1
497
+ version: '0'
487
498
  requirements: []
488
499
  rubygems_version: 3.0.3.1
489
- signing_key:
500
+ signing_key:
490
501
  specification_version: 4
491
502
  summary: Gem containing Elastic connectors service
492
503
  test_files: []
@@ -1,34 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
- # frozen_string_literal: true
7
-
8
- require 'utility/logger'
9
-
10
- module Connectors
11
- module Base
12
- class AdvancedSnippetValidator
13
-
14
- def initialize(advanced_snippet)
15
- @advanced_snippet = advanced_snippet || {}
16
- end
17
-
18
- def is_snippet_valid?
19
- raise 'Advanced Snippet validation not implemented'
20
- end
21
-
22
- private
23
-
24
- def log_validation_result(validation_result)
25
- Utility::Logger.info("Filtering Advanced Configuration validation result: #{validation_result[:state]}")
26
- if validation_result[:errors].present?
27
- validation_result[:errors].each do |error|
28
- Utility::Logger.warn("Validation error for: '#{error[:ids]}': '#{error[:messages]}'")
29
- end
30
- end
31
- end
32
- end
33
- end
34
- end