connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T010623Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -8
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/config.rb +3 -0
  5. data/lib/app/dispatcher.rb +44 -17
  6. data/lib/app/preflight_check.rb +11 -0
  7. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  8. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  9. data/lib/connectors/base/connector.rb +43 -14
  10. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  11. data/lib/connectors/example/connector.rb +6 -0
  12. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  13. data/lib/connectors/gitlab/connector.rb +6 -1
  14. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  15. data/lib/connectors/mongodb/connector.rb +47 -43
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  17. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  18. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  19. data/lib/connectors/sync_status.rb +6 -1
  20. data/lib/connectors/tolerable_error_helper.rb +43 -0
  21. data/lib/connectors_app/// +13 -0
  22. data/lib/core/configuration.rb +3 -1
  23. data/lib/core/connector_job.rb +210 -0
  24. data/lib/core/connector_settings.rb +52 -16
  25. data/lib/core/elastic_connector_actions.rb +320 -59
  26. data/lib/core/filtering/post_process_engine.rb +39 -0
  27. data/lib/core/filtering/post_process_result.rb +27 -0
  28. data/lib/core/filtering/simple_rule.rb +141 -0
  29. data/lib/core/filtering/validation_job_runner.rb +53 -0
  30. data/lib/core/filtering/validation_status.rb +17 -0
  31. data/lib/core/filtering.rb +17 -0
  32. data/lib/core/ingestion/es_sink.rb +118 -0
  33. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  34. data/lib/core/jobs/consumer.rb +132 -0
  35. data/lib/core/jobs/producer.rb +26 -0
  36. data/lib/core/scheduler.rb +40 -10
  37. data/lib/core/single_scheduler.rb +1 -1
  38. data/lib/core/sync_job_runner.rb +80 -16
  39. data/lib/core.rb +4 -0
  40. data/lib/utility/bulk_queue.rb +87 -0
  41. data/lib/utility/constants.rb +7 -0
  42. data/lib/utility/error_monitor.rb +108 -0
  43. data/lib/utility/errors.rb +0 -12
  44. data/lib/utility/filtering.rb +22 -0
  45. data/lib/utility/logger.rb +1 -1
  46. data/lib/utility.rb +11 -4
  47. metadata +31 -12
  48. data/lib/core/output_sink/base_sink.rb +0 -33
  49. data/lib/core/output_sink/combined_sink.rb +0 -38
  50. data/lib/core/output_sink/console_sink.rb +0 -51
  51. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,16 +21,29 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
22
- def initialize(connector_settings)
24
+ JOB_REPORTING_INTERVAL = 10
25
+
26
+ def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @sink = Core::Ingestion::EsSink.new(
29
+ connector_settings.index_name,
30
+ @connector_settings.request_pipeline,
31
+ Utility::BulkQueue.new(
32
+ max_ingestion_queue_size,
33
+ max_ingestion_queue_bytes
34
+ ),
35
+ max_ingestion_queue_bytes
36
+ )
25
37
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
38
  @sync_finished = false
39
+ @sync_error = nil
27
40
  @status = {
28
41
  :indexed_document_count => 0,
29
42
  :deleted_document_count => 0,
43
+ :indexed_document_volume => 0,
30
44
  :error => nil
31
45
  }
46
+ @job = job
32
47
  end
33
48
 
34
49
  def execute
@@ -41,8 +56,17 @@ module Core
41
56
  def do_sync!
42
57
  Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
58
 
44
- job_description = ElasticConnectorActions.claim_job(@connector_settings.id)
45
- job_id = job_description['_id']
59
+ # connector service doesn't support multiple jobs running simultaneously
60
+ raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
61
+
62
+ Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
63
+
64
+ # claim the job
65
+ @job.make_running!
66
+
67
+ job_description = @job.es_source
68
+ job_id = @job.id
69
+ job_description['_id'] = job_id
46
70
 
47
71
  unless job_id.present?
48
72
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -52,6 +76,10 @@ module Core
52
76
  begin
53
77
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
54
78
 
79
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
80
+ validate_filtering(job_description.dig(:connector, :filtering))
81
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
82
+
55
83
  connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
56
84
 
57
85
  connector_instance.do_health_check!
@@ -61,11 +89,21 @@ module Core
61
89
 
62
90
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
63
91
 
92
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
93
+ reporting_cycle_start = Time.now
94
+ Utility::Logger.info('Yielding documents...')
64
95
  connector_instance.yield_documents do |document|
65
96
  document = add_ingest_metadata(document)
66
- @sink.ingest(document)
67
- incoming_ids << document['id']
68
- @status[:indexed_document_count] += 1
97
+ post_process_result = post_processing_engine.process(document)
98
+ if post_process_result.is_include?
99
+ @sink.ingest(document)
100
+ incoming_ids << document['id']
101
+ end
102
+
103
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
104
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
105
+ reporting_cycle_start = Time.now
106
+ end
69
107
  end
70
108
 
71
109
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -74,7 +112,11 @@ module Core
74
112
 
75
113
  ids_to_delete.each do |id|
76
114
  @sink.delete(id)
77
- @status[:deleted_document_count] += 1
115
+
116
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
117
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
118
+ reporting_cycle_start = Time.now
119
+ end
78
120
  end
79
121
 
80
122
  @sink.flush
@@ -83,22 +125,34 @@ module Core
83
125
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
84
126
  @sync_finished = true
85
127
  rescue StandardError => e
86
- @status[:error] = e.message
128
+ @sync_error = e.message
87
129
  Utility::ExceptionTracking.log_exception(e)
88
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
89
130
  ensure
131
+ stats = @sink.ingestion_stats
132
+
133
+ Utility::Logger.debug("Sync stats are: #{stats}")
134
+
135
+ @status[:indexed_document_count] = stats[:indexed_document_count]
136
+ @status[:deleted_document_count] = stats[:deleted_document_count]
137
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
138
+
90
139
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
91
140
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
92
141
 
93
142
  # Make sure to not override a previous error message
94
- if !@sync_finished && @status[:error].nil?
95
- @status[:error] = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
143
+ if !@sync_finished && @sync_error.nil?
144
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
145
+ end
146
+
147
+ unless connector_instance.nil?
148
+ metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
149
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
96
150
  end
97
151
 
98
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
152
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
99
153
 
100
- if @status[:error]
101
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
154
+ if @sync_error
155
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
102
156
  else
103
157
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
104
158
  end
@@ -119,5 +173,15 @@ module Core
119
173
 
120
174
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
121
175
  end
176
+
177
+ def validate_filtering(filtering)
178
+ validation_result = @connector_class.validate_filtering(filtering)
179
+
180
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
181
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
182
+
183
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
184
+ raise errors_present_error if validation_result[:errors].present?
185
+ end
122
186
  end
123
187
  end
data/lib/core.rb CHANGED
@@ -7,10 +7,14 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
15
17
  require 'core/native_scheduler'
16
18
  require 'core/sync_job_runner'
19
+ require 'core/jobs/producer'
20
+ require 'core/jobs/consumer'
@@ -0,0 +1,87 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ require 'utility/constants'
10
+
11
+ module Utility
12
+ class BulkQueue
13
+ class QueueOverflowError < StandardError; end
14
+
15
+ # 500 items or 5MB
16
+ def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
17
+ @operation_count_threshold = operation_count_threshold.freeze
18
+ @size_threshold = size_threshold.freeze
19
+
20
+ @buffer = ''
21
+
22
+ @current_operation_count = 0
23
+
24
+ @current_buffer_size = 0
25
+ @current_data_size = 0
26
+ end
27
+
28
+ def pop_all
29
+ result = @buffer
30
+
31
+ reset
32
+
33
+ result
34
+ end
35
+
36
+ def add(operation, payload = nil)
37
+ raise QueueOverflowError unless will_fit?(operation, payload)
38
+
39
+ operation_size = get_size(operation)
40
+ payload_size = get_size(payload)
41
+
42
+ @current_operation_count += 1
43
+ @current_buffer_size += operation_size
44
+ @current_buffer_size += payload_size
45
+ @current_data_size += payload_size
46
+
47
+ @buffer << operation
48
+ @buffer << "\n"
49
+
50
+ if payload
51
+ @buffer << payload
52
+ @buffer << "\n"
53
+ end
54
+ end
55
+
56
+ def will_fit?(operation, payload = nil)
57
+ return false if @current_operation_count + 1 > @operation_count_threshold
58
+
59
+ operation_size = get_size(operation)
60
+ payload_size = get_size(payload)
61
+
62
+ @current_buffer_size + operation_size + payload_size < @size_threshold
63
+ end
64
+
65
+ def current_stats
66
+ {
67
+ :current_operation_count => @current_operation_count,
68
+ :current_buffer_size => @current_buffer_size
69
+ }
70
+ end
71
+
72
+ private
73
+
74
+ def get_size(str)
75
+ return 0 unless str
76
+ str.bytesize
77
+ end
78
+
79
+ def reset
80
+ @current_operation_count = 0
81
+ @current_buffer_size = 0
82
+ @current_data_size = 0
83
+
84
+ @buffer = ''
85
+ end
86
+ end
87
+ end
@@ -16,5 +16,12 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
+
22
+ # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
+ DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
24
+ # Maximum size of either whole BULK Elasticsearch operation or one document in it
25
+ DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
19
26
  end
20
27
  end
@@ -0,0 +1,108 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'utility/errors'
11
+ require 'utility/exception_tracking'
12
+
13
+ module Utility
14
+ class ErrorMonitor
15
+ class MonitoringError < StandardError
16
+ attr_accessor :tripped_by
17
+
18
+ def initialize(message = nil, tripped_by: nil)
19
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
+ @tripped_by = tripped_by
21
+ end
22
+ end
23
+
24
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
+ class MaxErrorsExceededError < MonitoringError; end
26
+ class MaxErrorsInWindowExceededError < MonitoringError; end
27
+
28
+ attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
+
30
+ def initialize(
31
+ max_errors: 1000,
32
+ max_consecutive_errors: 10,
33
+ max_error_ratio: 0.15,
34
+ window_size: 100,
35
+ error_queue_size: 20
36
+ )
37
+ @max_errors = max_errors
38
+ @max_consecutive_errors = max_consecutive_errors
39
+ @max_error_ratio = max_error_ratio
40
+ @window_size = window_size
41
+ @total_error_count = 0
42
+ @success_count = 0
43
+ @consecutive_error_count = 0
44
+ @window_errors = Array.new(window_size) { false }
45
+ @window_index = 0
46
+ @last_error = nil
47
+ @error_queue_size = error_queue_size
48
+ @error_queue = []
49
+ end
50
+
51
+ def note_success
52
+ @consecutive_error_count = 0
53
+ @success_count += 1
54
+ increment_window_index
55
+ end
56
+
57
+ def note_error(error, id: Time.now.to_i)
58
+ stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
+ error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
+ Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
+ @total_error_count += 1
62
+ @consecutive_error_count += 1
63
+ @window_errors[@window_index] = true
64
+ @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
+ @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
+ increment_window_index
67
+ @last_error = error
68
+
69
+ raise_if_necessary
70
+ end
71
+
72
+ def finalize
73
+ total_documents = @total_error_count + @success_count
74
+ if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
+ raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def raise_if_necessary
82
+ error =
83
+ if @consecutive_error_count > @max_consecutive_errors
84
+ MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
+ elsif @total_error_count > @max_errors
86
+ MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
+ elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
+ MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
+ end
90
+
91
+ raise_with_last_cause(error) if error
92
+ end
93
+
94
+ def num_errors_in_window
95
+ @window_errors.count(&:itself).to_f
96
+ end
97
+
98
+ def increment_window_index
99
+ @window_index = (@window_index + 1) % @window_size
100
+ end
101
+
102
+ def raise_with_last_cause(error)
103
+ raise @last_error
104
+ rescue StandardError
105
+ raise error
106
+ end
107
+ end
108
+ end
@@ -60,18 +60,6 @@ module Utility
60
60
  class JobDocumentLimitError < StandardError; end
61
61
  class JobClaimingError < StandardError; end
62
62
 
63
- class MonitoringError < StandardError
64
- attr_accessor :tripped_by
65
-
66
- def initialize(message = nil, tripped_by: nil)
67
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
68
- @tripped_by = tripped_by
69
- end
70
- end
71
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
72
- class MaxErrorsExceededError < MonitoringError; end
73
- class MaxErrorsInWindowExceededError < MonitoringError; end
74
-
75
63
  class JobSyncNotPossibleYetError < StandardError
76
64
  attr_accessor :sync_will_be_possible_at
77
65
 
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -23,7 +23,7 @@ module Utility
23
23
  end
24
24
 
25
25
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
26
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
27
  end
28
28
 
29
29
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,21 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
9
+ require 'utility/bulk_queue'
10
+ require 'utility/common'
7
11
  require 'utility/constants'
8
12
  require 'utility/cron'
9
- require 'utility/common'
13
+ require 'utility/elasticsearch/index/mappings'
14
+ require 'utility/elasticsearch/index/text_analysis_settings'
15
+ require 'utility/environment'
16
+ require 'utility/error_monitor'
10
17
  require 'utility/errors'
18
+ require 'utility/filtering'
11
19
  require 'utility/es_client'
12
- require 'utility/environment'
13
20
  require 'utility/exception_tracking'
14
21
  require 'utility/extension_mapping_util'
15
22
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4
4
+ version: 8.7.0.0.pre.20221117T010623Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-04 00:00:00.000000000 Z
11
+ date: 2022-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -396,34 +396,50 @@ files:
396
396
  - lib/app/version.rb
397
397
  - lib/connectors.rb
398
398
  - lib/connectors/base/adapter.rb
399
+ - lib/connectors/base/advanced_snippet_against_schema_validator.rb
400
+ - lib/connectors/base/advanced_snippet_validator.rb
399
401
  - lib/connectors/base/connector.rb
400
402
  - lib/connectors/base/custom_client.rb
403
+ - lib/connectors/base/simple_rules_parser.rb
401
404
  - lib/connectors/connector_status.rb
402
405
  - lib/connectors/crawler/scheduler.rb
403
406
  - lib/connectors/example/attachments/first_attachment.txt
404
407
  - lib/connectors/example/attachments/second_attachment.txt
405
408
  - lib/connectors/example/attachments/third_attachment.txt
406
409
  - lib/connectors/example/connector.rb
410
+ - lib/connectors/example/example_advanced_snippet_validator.rb
407
411
  - lib/connectors/gitlab/adapter.rb
408
412
  - lib/connectors/gitlab/connector.rb
409
413
  - lib/connectors/gitlab/custom_client.rb
410
414
  - lib/connectors/gitlab/extractor.rb
415
+ - lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
411
416
  - lib/connectors/mongodb/connector.rb
417
+ - lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
418
+ - lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
419
+ - lib/connectors/mongodb/mongo_rules_parser.rb
412
420
  - lib/connectors/registry.rb
413
421
  - lib/connectors/sync_status.rb
422
+ - lib/connectors/tolerable_error_helper.rb
423
+ - lib/connectors_app/\
414
424
  - lib/connectors_service.rb
415
425
  - lib/connectors_utility.rb
416
426
  - lib/core.rb
417
427
  - lib/core/configuration.rb
428
+ - lib/core/connector_job.rb
418
429
  - lib/core/connector_settings.rb
419
430
  - lib/core/elastic_connector_actions.rb
431
+ - lib/core/filtering.rb
432
+ - lib/core/filtering/post_process_engine.rb
433
+ - lib/core/filtering/post_process_result.rb
434
+ - lib/core/filtering/simple_rule.rb
435
+ - lib/core/filtering/validation_job_runner.rb
436
+ - lib/core/filtering/validation_status.rb
420
437
  - lib/core/heartbeat.rb
438
+ - lib/core/ingestion.rb
439
+ - lib/core/ingestion/es_sink.rb
440
+ - lib/core/jobs/consumer.rb
441
+ - lib/core/jobs/producer.rb
421
442
  - lib/core/native_scheduler.rb
422
- - lib/core/output_sink.rb
423
- - lib/core/output_sink/base_sink.rb
424
- - lib/core/output_sink/combined_sink.rb
425
- - lib/core/output_sink/console_sink.rb
426
- - lib/core/output_sink/es_sink.rb
427
443
  - lib/core/scheduler.rb
428
444
  - lib/core/single_scheduler.rb
429
445
  - lib/core/sync_job_runner.rb
@@ -432,6 +448,7 @@ files:
432
448
  - lib/stubs/connectors/stats.rb
433
449
  - lib/stubs/service_type.rb
434
450
  - lib/utility.rb
451
+ - lib/utility/bulk_queue.rb
435
452
  - lib/utility/common.rb
436
453
  - lib/utility/constants.rb
437
454
  - lib/utility/cron.rb
@@ -439,10 +456,12 @@ files:
439
456
  - lib/utility/elasticsearch/index/mappings.rb
440
457
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
441
458
  - lib/utility/environment.rb
459
+ - lib/utility/error_monitor.rb
442
460
  - lib/utility/errors.rb
443
461
  - lib/utility/es_client.rb
444
462
  - lib/utility/exception_tracking.rb
445
463
  - lib/utility/extension_mapping_util.rb
464
+ - lib/utility/filtering.rb
446
465
  - lib/utility/logger.rb
447
466
  - lib/utility/middleware/basic_auth.rb
448
467
  - lib/utility/middleware/bearer_auth.rb
@@ -451,7 +470,7 @@ homepage: https://github.com/elastic/connectors-ruby
451
470
  licenses:
452
471
  - Elastic-2.0
453
472
  metadata: {}
454
- post_install_message:
473
+ post_install_message:
455
474
  rdoc_options: []
456
475
  require_paths:
457
476
  - lib
@@ -462,12 +481,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
462
481
  version: '0'
463
482
  required_rubygems_version: !ruby/object:Gem::Requirement
464
483
  requirements:
465
- - - ">="
484
+ - - ">"
466
485
  - !ruby/object:Gem::Version
467
- version: '0'
486
+ version: 1.3.1
468
487
  requirements: []
469
488
  rubygems_version: 3.0.3.1
470
- signing_key:
489
+ signing_key:
471
490
  specification_version: 4
472
491
  summary: Gem containing Elastic connectors service
473
492
  test_files: []
@@ -1,33 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module OutputSink
11
- class BaseSink
12
- def ingest(_document)
13
- raise 'not implemented'
14
- end
15
-
16
- def ingest_multiple(_documents)
17
- raise 'not implemented'
18
- end
19
-
20
- def delete(_id)
21
- raise 'not implemented'
22
- end
23
-
24
- def delete_multiple(_ids)
25
- raise 'not implemented'
26
- end
27
-
28
- def flush(_size: nil)
29
- raise 'not implemented'
30
- end
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink/base_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class CombinedSink < Core::OutputSink::BaseSink
14
- def initialize(sinks = [])
15
- @sinks = sinks
16
- end
17
-
18
- def ingest(document)
19
- @sinks.each { |sink| sink.ingest(document) }
20
- end
21
-
22
- def flush(size: nil)
23
- @sinks.each { |sink| sink.flush(size: size) }
24
- end
25
-
26
- def ingest_multiple(documents)
27
- @sinks.each { |sink| sink.ingest_multiple(documents) }
28
- end
29
-
30
- def delete(id)
31
- @sinks.each { |sink| sink.delete(id) }
32
- end
33
-
34
- def delete_multiple(ids)
35
- @sinks.each { |sink| sink.delete_multiple(ids) }
36
- end
37
- end
38
- end