connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T004928Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -8
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/config.rb +3 -0
  5. data/lib/app/dispatcher.rb +44 -17
  6. data/lib/app/preflight_check.rb +11 -0
  7. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  8. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  9. data/lib/connectors/base/connector.rb +43 -14
  10. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  11. data/lib/connectors/example/connector.rb +6 -0
  12. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  13. data/lib/connectors/gitlab/connector.rb +6 -1
  14. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  15. data/lib/connectors/mongodb/connector.rb +47 -43
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  17. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  18. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  19. data/lib/connectors/sync_status.rb +6 -1
  20. data/lib/connectors/tolerable_error_helper.rb +43 -0
  21. data/lib/connectors_app/// +13 -0
  22. data/lib/core/configuration.rb +3 -1
  23. data/lib/core/connector_job.rb +210 -0
  24. data/lib/core/connector_settings.rb +52 -16
  25. data/lib/core/elastic_connector_actions.rb +320 -59
  26. data/lib/core/filtering/post_process_engine.rb +39 -0
  27. data/lib/core/filtering/post_process_result.rb +27 -0
  28. data/lib/core/filtering/simple_rule.rb +141 -0
  29. data/lib/core/filtering/validation_job_runner.rb +53 -0
  30. data/lib/core/filtering/validation_status.rb +17 -0
  31. data/lib/core/filtering.rb +17 -0
  32. data/lib/core/ingestion/es_sink.rb +118 -0
  33. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  34. data/lib/core/jobs/consumer.rb +132 -0
  35. data/lib/core/jobs/producer.rb +26 -0
  36. data/lib/core/scheduler.rb +40 -10
  37. data/lib/core/single_scheduler.rb +1 -1
  38. data/lib/core/sync_job_runner.rb +80 -16
  39. data/lib/core.rb +4 -0
  40. data/lib/utility/bulk_queue.rb +87 -0
  41. data/lib/utility/constants.rb +7 -0
  42. data/lib/utility/error_monitor.rb +108 -0
  43. data/lib/utility/errors.rb +0 -12
  44. data/lib/utility/filtering.rb +22 -0
  45. data/lib/utility/logger.rb +1 -1
  46. data/lib/utility.rb +11 -4
  47. metadata +31 -12
  48. data/lib/core/output_sink/base_sink.rb +0 -33
  49. data/lib/core/output_sink/combined_sink.rb +0 -38
  50. data/lib/core/output_sink/console_sink.rb +0 -51
  51. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,16 +21,29 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
22
- def initialize(connector_settings)
24
+ JOB_REPORTING_INTERVAL = 10
25
+
26
+ def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @sink = Core::Ingestion::EsSink.new(
29
+ connector_settings.index_name,
30
+ @connector_settings.request_pipeline,
31
+ Utility::BulkQueue.new(
32
+ max_ingestion_queue_size,
33
+ max_ingestion_queue_bytes
34
+ ),
35
+ max_ingestion_queue_bytes
36
+ )
25
37
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
38
  @sync_finished = false
39
+ @sync_error = nil
27
40
  @status = {
28
41
  :indexed_document_count => 0,
29
42
  :deleted_document_count => 0,
43
+ :indexed_document_volume => 0,
30
44
  :error => nil
31
45
  }
46
+ @job = job
32
47
  end
33
48
 
34
49
  def execute
@@ -41,8 +56,17 @@ module Core
41
56
  def do_sync!
42
57
  Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
58
 
44
- job_description = ElasticConnectorActions.claim_job(@connector_settings.id)
45
- job_id = job_description['_id']
59
+ # connector service doesn't support multiple jobs running simultaneously
60
+ raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
61
+
62
+ Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
63
+
64
+ # claim the job
65
+ @job.make_running!
66
+
67
+ job_description = @job.es_source
68
+ job_id = @job.id
69
+ job_description['_id'] = job_id
46
70
 
47
71
  unless job_id.present?
48
72
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -52,6 +76,10 @@ module Core
52
76
  begin
53
77
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
54
78
 
79
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
80
+ validate_filtering(job_description.dig(:connector, :filtering))
81
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
82
+
55
83
  connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
56
84
 
57
85
  connector_instance.do_health_check!
@@ -61,11 +89,21 @@ module Core
61
89
 
62
90
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
63
91
 
92
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
93
+ reporting_cycle_start = Time.now
94
+ Utility::Logger.info('Yielding documents...')
64
95
  connector_instance.yield_documents do |document|
65
96
  document = add_ingest_metadata(document)
66
- @sink.ingest(document)
67
- incoming_ids << document['id']
68
- @status[:indexed_document_count] += 1
97
+ post_process_result = post_processing_engine.process(document)
98
+ if post_process_result.is_include?
99
+ @sink.ingest(document)
100
+ incoming_ids << document['id']
101
+ end
102
+
103
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
104
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
105
+ reporting_cycle_start = Time.now
106
+ end
69
107
  end
70
108
 
71
109
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -74,7 +112,11 @@ module Core
74
112
 
75
113
  ids_to_delete.each do |id|
76
114
  @sink.delete(id)
77
- @status[:deleted_document_count] += 1
115
+
116
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
117
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
118
+ reporting_cycle_start = Time.now
119
+ end
78
120
  end
79
121
 
80
122
  @sink.flush
@@ -83,22 +125,34 @@ module Core
83
125
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
84
126
  @sync_finished = true
85
127
  rescue StandardError => e
86
- @status[:error] = e.message
128
+ @sync_error = e.message
87
129
  Utility::ExceptionTracking.log_exception(e)
88
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
89
130
  ensure
131
+ stats = @sink.ingestion_stats
132
+
133
+ Utility::Logger.debug("Sync stats are: #{stats}")
134
+
135
+ @status[:indexed_document_count] = stats[:indexed_document_count]
136
+ @status[:deleted_document_count] = stats[:deleted_document_count]
137
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
138
+
90
139
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
91
140
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
92
141
 
93
142
  # Make sure to not override a previous error message
94
- if !@sync_finished && @status[:error].nil?
95
- @status[:error] = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
143
+ if !@sync_finished && @sync_error.nil?
144
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
145
+ end
146
+
147
+ unless connector_instance.nil?
148
+ metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
149
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
96
150
  end
97
151
 
98
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
152
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
99
153
 
100
- if @status[:error]
101
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
154
+ if @sync_error
155
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
102
156
  else
103
157
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
104
158
  end
@@ -119,5 +173,15 @@ module Core
119
173
 
120
174
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
121
175
  end
176
+
177
+ def validate_filtering(filtering)
178
+ validation_result = @connector_class.validate_filtering(filtering)
179
+
180
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
181
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
182
+
183
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
184
+ raise errors_present_error if validation_result[:errors].present?
185
+ end
122
186
  end
123
187
  end
data/lib/core.rb CHANGED
@@ -7,10 +7,14 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
15
17
  require 'core/native_scheduler'
16
18
  require 'core/sync_job_runner'
19
+ require 'core/jobs/producer'
20
+ require 'core/jobs/consumer'
@@ -0,0 +1,87 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ require 'utility/constants'
10
+
11
+ module Utility
12
+ class BulkQueue
13
+ class QueueOverflowError < StandardError; end
14
+
15
+ # 500 items or 5MB
16
+ def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
17
+ @operation_count_threshold = operation_count_threshold.freeze
18
+ @size_threshold = size_threshold.freeze
19
+
20
+ @buffer = ''
21
+
22
+ @current_operation_count = 0
23
+
24
+ @current_buffer_size = 0
25
+ @current_data_size = 0
26
+ end
27
+
28
+ def pop_all
29
+ result = @buffer
30
+
31
+ reset
32
+
33
+ result
34
+ end
35
+
36
+ def add(operation, payload = nil)
37
+ raise QueueOverflowError unless will_fit?(operation, payload)
38
+
39
+ operation_size = get_size(operation)
40
+ payload_size = get_size(payload)
41
+
42
+ @current_operation_count += 1
43
+ @current_buffer_size += operation_size
44
+ @current_buffer_size += payload_size
45
+ @current_data_size += payload_size
46
+
47
+ @buffer << operation
48
+ @buffer << "\n"
49
+
50
+ if payload
51
+ @buffer << payload
52
+ @buffer << "\n"
53
+ end
54
+ end
55
+
56
+ def will_fit?(operation, payload = nil)
57
+ return false if @current_operation_count + 1 > @operation_count_threshold
58
+
59
+ operation_size = get_size(operation)
60
+ payload_size = get_size(payload)
61
+
62
+ @current_buffer_size + operation_size + payload_size < @size_threshold
63
+ end
64
+
65
+ def current_stats
66
+ {
67
+ :current_operation_count => @current_operation_count,
68
+ :current_buffer_size => @current_buffer_size
69
+ }
70
+ end
71
+
72
+ private
73
+
74
+ def get_size(str)
75
+ return 0 unless str
76
+ str.bytesize
77
+ end
78
+
79
+ def reset
80
+ @current_operation_count = 0
81
+ @current_buffer_size = 0
82
+ @current_data_size = 0
83
+
84
+ @buffer = ''
85
+ end
86
+ end
87
+ end
@@ -16,5 +16,12 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
+
22
+ # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
+ DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
24
+ # Maximum size of either whole BULK Elasticsearch operation or one document in it
25
+ DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
19
26
  end
20
27
  end
@@ -0,0 +1,108 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'utility/errors'
11
+ require 'utility/exception_tracking'
12
+
13
+ module Utility
14
+ class ErrorMonitor
15
+ class MonitoringError < StandardError
16
+ attr_accessor :tripped_by
17
+
18
+ def initialize(message = nil, tripped_by: nil)
19
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
+ @tripped_by = tripped_by
21
+ end
22
+ end
23
+
24
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
+ class MaxErrorsExceededError < MonitoringError; end
26
+ class MaxErrorsInWindowExceededError < MonitoringError; end
27
+
28
+ attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
+
30
+ def initialize(
31
+ max_errors: 1000,
32
+ max_consecutive_errors: 10,
33
+ max_error_ratio: 0.15,
34
+ window_size: 100,
35
+ error_queue_size: 20
36
+ )
37
+ @max_errors = max_errors
38
+ @max_consecutive_errors = max_consecutive_errors
39
+ @max_error_ratio = max_error_ratio
40
+ @window_size = window_size
41
+ @total_error_count = 0
42
+ @success_count = 0
43
+ @consecutive_error_count = 0
44
+ @window_errors = Array.new(window_size) { false }
45
+ @window_index = 0
46
+ @last_error = nil
47
+ @error_queue_size = error_queue_size
48
+ @error_queue = []
49
+ end
50
+
51
+ def note_success
52
+ @consecutive_error_count = 0
53
+ @success_count += 1
54
+ increment_window_index
55
+ end
56
+
57
+ def note_error(error, id: Time.now.to_i)
58
+ stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
+ error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
+ Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
+ @total_error_count += 1
62
+ @consecutive_error_count += 1
63
+ @window_errors[@window_index] = true
64
+ @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
+ @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
+ increment_window_index
67
+ @last_error = error
68
+
69
+ raise_if_necessary
70
+ end
71
+
72
+ def finalize
73
+ total_documents = @total_error_count + @success_count
74
+ if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
+ raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def raise_if_necessary
82
+ error =
83
+ if @consecutive_error_count > @max_consecutive_errors
84
+ MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
+ elsif @total_error_count > @max_errors
86
+ MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
+ elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
+ MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
+ end
90
+
91
+ raise_with_last_cause(error) if error
92
+ end
93
+
94
+ def num_errors_in_window
95
+ @window_errors.count(&:itself).to_f
96
+ end
97
+
98
+ def increment_window_index
99
+ @window_index = (@window_index + 1) % @window_size
100
+ end
101
+
102
+ def raise_with_last_cause(error)
103
+ raise @last_error
104
+ rescue StandardError
105
+ raise error
106
+ end
107
+ end
108
+ end
@@ -60,18 +60,6 @@ module Utility
60
60
  class JobDocumentLimitError < StandardError; end
61
61
  class JobClaimingError < StandardError; end
62
62
 
63
- class MonitoringError < StandardError
64
- attr_accessor :tripped_by
65
-
66
- def initialize(message = nil, tripped_by: nil)
67
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
68
- @tripped_by = tripped_by
69
- end
70
- end
71
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
72
- class MaxErrorsExceededError < MonitoringError; end
73
- class MaxErrorsInWindowExceededError < MonitoringError; end
74
-
75
63
  class JobSyncNotPossibleYetError < StandardError
76
64
  attr_accessor :sync_will_be_possible_at
77
65
 
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -23,7 +23,7 @@ module Utility
23
23
  end
24
24
 
25
25
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
26
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
27
  end
28
28
 
29
29
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,21 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
9
+ require 'utility/bulk_queue'
10
+ require 'utility/common'
7
11
  require 'utility/constants'
8
12
  require 'utility/cron'
9
- require 'utility/common'
13
+ require 'utility/elasticsearch/index/mappings'
14
+ require 'utility/elasticsearch/index/text_analysis_settings'
15
+ require 'utility/environment'
16
+ require 'utility/error_monitor'
10
17
  require 'utility/errors'
18
+ require 'utility/filtering'
11
19
  require 'utility/es_client'
12
- require 'utility/environment'
13
20
  require 'utility/exception_tracking'
14
21
  require 'utility/extension_mapping_util'
15
22
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4
4
+ version: 8.7.0.0.pre.20221117T004928Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-04 00:00:00.000000000 Z
11
+ date: 2022-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -396,34 +396,50 @@ files:
396
396
  - lib/app/version.rb
397
397
  - lib/connectors.rb
398
398
  - lib/connectors/base/adapter.rb
399
+ - lib/connectors/base/advanced_snippet_against_schema_validator.rb
400
+ - lib/connectors/base/advanced_snippet_validator.rb
399
401
  - lib/connectors/base/connector.rb
400
402
  - lib/connectors/base/custom_client.rb
403
+ - lib/connectors/base/simple_rules_parser.rb
401
404
  - lib/connectors/connector_status.rb
402
405
  - lib/connectors/crawler/scheduler.rb
403
406
  - lib/connectors/example/attachments/first_attachment.txt
404
407
  - lib/connectors/example/attachments/second_attachment.txt
405
408
  - lib/connectors/example/attachments/third_attachment.txt
406
409
  - lib/connectors/example/connector.rb
410
+ - lib/connectors/example/example_advanced_snippet_validator.rb
407
411
  - lib/connectors/gitlab/adapter.rb
408
412
  - lib/connectors/gitlab/connector.rb
409
413
  - lib/connectors/gitlab/custom_client.rb
410
414
  - lib/connectors/gitlab/extractor.rb
415
+ - lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
411
416
  - lib/connectors/mongodb/connector.rb
417
+ - lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
418
+ - lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
419
+ - lib/connectors/mongodb/mongo_rules_parser.rb
412
420
  - lib/connectors/registry.rb
413
421
  - lib/connectors/sync_status.rb
422
+ - lib/connectors/tolerable_error_helper.rb
423
+ - lib/connectors_app/\
414
424
  - lib/connectors_service.rb
415
425
  - lib/connectors_utility.rb
416
426
  - lib/core.rb
417
427
  - lib/core/configuration.rb
428
+ - lib/core/connector_job.rb
418
429
  - lib/core/connector_settings.rb
419
430
  - lib/core/elastic_connector_actions.rb
431
+ - lib/core/filtering.rb
432
+ - lib/core/filtering/post_process_engine.rb
433
+ - lib/core/filtering/post_process_result.rb
434
+ - lib/core/filtering/simple_rule.rb
435
+ - lib/core/filtering/validation_job_runner.rb
436
+ - lib/core/filtering/validation_status.rb
420
437
  - lib/core/heartbeat.rb
438
+ - lib/core/ingestion.rb
439
+ - lib/core/ingestion/es_sink.rb
440
+ - lib/core/jobs/consumer.rb
441
+ - lib/core/jobs/producer.rb
421
442
  - lib/core/native_scheduler.rb
422
- - lib/core/output_sink.rb
423
- - lib/core/output_sink/base_sink.rb
424
- - lib/core/output_sink/combined_sink.rb
425
- - lib/core/output_sink/console_sink.rb
426
- - lib/core/output_sink/es_sink.rb
427
443
  - lib/core/scheduler.rb
428
444
  - lib/core/single_scheduler.rb
429
445
  - lib/core/sync_job_runner.rb
@@ -432,6 +448,7 @@ files:
432
448
  - lib/stubs/connectors/stats.rb
433
449
  - lib/stubs/service_type.rb
434
450
  - lib/utility.rb
451
+ - lib/utility/bulk_queue.rb
435
452
  - lib/utility/common.rb
436
453
  - lib/utility/constants.rb
437
454
  - lib/utility/cron.rb
@@ -439,10 +456,12 @@ files:
439
456
  - lib/utility/elasticsearch/index/mappings.rb
440
457
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
441
458
  - lib/utility/environment.rb
459
+ - lib/utility/error_monitor.rb
442
460
  - lib/utility/errors.rb
443
461
  - lib/utility/es_client.rb
444
462
  - lib/utility/exception_tracking.rb
445
463
  - lib/utility/extension_mapping_util.rb
464
+ - lib/utility/filtering.rb
446
465
  - lib/utility/logger.rb
447
466
  - lib/utility/middleware/basic_auth.rb
448
467
  - lib/utility/middleware/bearer_auth.rb
@@ -451,7 +470,7 @@ homepage: https://github.com/elastic/connectors-ruby
451
470
  licenses:
452
471
  - Elastic-2.0
453
472
  metadata: {}
454
- post_install_message:
473
+ post_install_message:
455
474
  rdoc_options: []
456
475
  require_paths:
457
476
  - lib
@@ -462,12 +481,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
462
481
  version: '0'
463
482
  required_rubygems_version: !ruby/object:Gem::Requirement
464
483
  requirements:
465
- - - ">="
484
+ - - ">"
466
485
  - !ruby/object:Gem::Version
467
- version: '0'
486
+ version: 1.3.1
468
487
  requirements: []
469
488
  rubygems_version: 3.0.3.1
470
- signing_key:
489
+ signing_key:
471
490
  specification_version: 4
472
491
  summary: Gem containing Elastic connectors service
473
492
  test_files: []
@@ -1,33 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module OutputSink
11
- class BaseSink
12
- def ingest(_document)
13
- raise 'not implemented'
14
- end
15
-
16
- def ingest_multiple(_documents)
17
- raise 'not implemented'
18
- end
19
-
20
- def delete(_id)
21
- raise 'not implemented'
22
- end
23
-
24
- def delete_multiple(_ids)
25
- raise 'not implemented'
26
- end
27
-
28
- def flush(_size: nil)
29
- raise 'not implemented'
30
- end
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink/base_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class CombinedSink < Core::OutputSink::BaseSink
14
- def initialize(sinks = [])
15
- @sinks = sinks
16
- end
17
-
18
- def ingest(document)
19
- @sinks.each { |sink| sink.ingest(document) }
20
- end
21
-
22
- def flush(size: nil)
23
- @sinks.each { |sink| sink.flush(size: size) }
24
- end
25
-
26
- def ingest_multiple(documents)
27
- @sinks.each { |sink| sink.ingest_multiple(documents) }
28
- end
29
-
30
- def delete(id)
31
- @sinks.each { |sink| sink.delete(id) }
32
- end
33
-
34
- def delete_multiple(ids)
35
- @sinks.each { |sink| sink.delete_multiple(ids) }
36
- end
37
- end
38
- end