connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/dispatcher.rb +42 -17
  5. data/lib/app/preflight_check.rb +11 -0
  6. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  7. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  8. data/lib/connectors/base/connector.rb +43 -14
  9. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  10. data/lib/connectors/example/connector.rb +6 -0
  11. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  12. data/lib/connectors/gitlab/connector.rb +6 -1
  13. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  14. data/lib/connectors/mongodb/connector.rb +47 -43
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  17. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  18. data/lib/connectors/sync_status.rb +6 -1
  19. data/lib/connectors/tolerable_error_helper.rb +43 -0
  20. data/lib/core/configuration.rb +3 -1
  21. data/lib/core/connector_job.rb +210 -0
  22. data/lib/core/connector_settings.rb +52 -16
  23. data/lib/core/elastic_connector_actions.rb +320 -59
  24. data/lib/core/filtering/post_process_engine.rb +39 -0
  25. data/lib/core/filtering/post_process_result.rb +27 -0
  26. data/lib/core/filtering/simple_rule.rb +141 -0
  27. data/lib/core/filtering/validation_job_runner.rb +53 -0
  28. data/lib/core/filtering/validation_status.rb +17 -0
  29. data/lib/core/filtering.rb +17 -0
  30. data/lib/core/ingestion/es_sink.rb +118 -0
  31. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  32. data/lib/core/jobs/consumer.rb +114 -0
  33. data/lib/core/jobs/producer.rb +26 -0
  34. data/lib/core/scheduler.rb +40 -10
  35. data/lib/core/single_scheduler.rb +1 -1
  36. data/lib/core/sync_job_runner.rb +72 -16
  37. data/lib/core.rb +4 -0
  38. data/lib/utility/bulk_queue.rb +85 -0
  39. data/lib/utility/constants.rb +2 -0
  40. data/lib/utility/error_monitor.rb +108 -0
  41. data/lib/utility/errors.rb +0 -12
  42. data/lib/utility/filtering.rb +22 -0
  43. data/lib/utility/logger.rb +1 -1
  44. data/lib/utility.rb +11 -4
  45. metadata +25 -7
  46. data/lib/core/output_sink/base_sink.rb +0 -33
  47. data/lib/core/output_sink/combined_sink.rb +0 -38
  48. data/lib/core/output_sink/console_sink.rb +0 -51
  49. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,16 +21,21 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
22
- def initialize(connector_settings)
24
+ JOB_REPORTING_INTERVAL = 10
25
+
26
+ def initialize(connector_settings, job)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @sink = Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
25
29
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
30
  @sync_finished = false
31
+ @sync_error = nil
27
32
  @status = {
28
33
  :indexed_document_count => 0,
29
34
  :deleted_document_count => 0,
35
+ :indexed_document_volume => 0,
30
36
  :error => nil
31
37
  }
38
+ @job = job
32
39
  end
33
40
 
34
41
  def execute
@@ -41,8 +48,17 @@ module Core
41
48
  def do_sync!
42
49
  Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
50
 
44
- job_description = ElasticConnectorActions.claim_job(@connector_settings.id)
45
- job_id = job_description['_id']
51
+ # connector service doesn't support multiple jobs running simultaneously
52
+ raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
53
+
54
+ Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
55
+
56
+ # claim the job
57
+ @job.make_running!
58
+
59
+ job_description = @job.es_source
60
+ job_id = @job.id
61
+ job_description['_id'] = job_id
46
62
 
47
63
  unless job_id.present?
48
64
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -52,6 +68,10 @@ module Core
52
68
  begin
53
69
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
54
70
 
71
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
72
+ validate_filtering(job_description.dig(:connector, :filtering))
73
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
74
+
55
75
  connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
56
76
 
57
77
  connector_instance.do_health_check!
@@ -61,11 +81,21 @@ module Core
61
81
 
62
82
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
63
83
 
84
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
85
+ reporting_cycle_start = Time.now
86
+ Utility::Logger.info('Yielding documents...')
64
87
  connector_instance.yield_documents do |document|
65
88
  document = add_ingest_metadata(document)
66
- @sink.ingest(document)
67
- incoming_ids << document['id']
68
- @status[:indexed_document_count] += 1
89
+ post_process_result = post_processing_engine.process(document)
90
+ if post_process_result.is_include?
91
+ @sink.ingest(document)
92
+ incoming_ids << document['id']
93
+ end
94
+
95
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
96
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
97
+ reporting_cycle_start = Time.now
98
+ end
69
99
  end
70
100
 
71
101
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -74,7 +104,11 @@ module Core
74
104
 
75
105
  ids_to_delete.each do |id|
76
106
  @sink.delete(id)
77
- @status[:deleted_document_count] += 1
107
+
108
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
109
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
110
+ reporting_cycle_start = Time.now
111
+ end
78
112
  end
79
113
 
80
114
  @sink.flush
@@ -83,22 +117,34 @@ module Core
83
117
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
84
118
  @sync_finished = true
85
119
  rescue StandardError => e
86
- @status[:error] = e.message
120
+ @sync_error = e.message
87
121
  Utility::ExceptionTracking.log_exception(e)
88
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
89
122
  ensure
123
+ stats = @sink.ingestion_stats
124
+
125
+ Utility::Logger.debug("Sync stats are: #{stats}")
126
+
127
+ @status[:indexed_document_count] = stats[:indexed_document_count]
128
+ @status[:deleted_document_count] = stats[:deleted_document_count]
129
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
130
+
90
131
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
91
132
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
92
133
 
93
134
  # Make sure to not override a previous error message
94
- if !@sync_finished && @status[:error].nil?
95
- @status[:error] = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
135
+ if !@sync_finished && @sync_error.nil?
136
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
137
+ end
138
+
139
+ unless connector_instance.nil?
140
+ metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
141
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
96
142
  end
97
143
 
98
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
144
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
99
145
 
100
- if @status[:error]
101
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
146
+ if @sync_error
147
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
102
148
  else
103
149
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
104
150
  end
@@ -119,5 +165,15 @@ module Core
119
165
 
120
166
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
121
167
  end
168
+
169
+ def validate_filtering(filtering)
170
+ validation_result = @connector_class.validate_filtering(filtering)
171
+
172
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
173
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
174
+
175
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
176
+ raise errors_present_error if validation_result[:errors].present?
177
+ end
122
178
  end
123
179
  end
data/lib/core.rb CHANGED
@@ -7,10 +7,14 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
15
17
  require 'core/native_scheduler'
16
18
  require 'core/sync_job_runner'
19
+ require 'core/jobs/producer'
20
+ require 'core/jobs/consumer'
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end
@@ -0,0 +1,108 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'utility/errors'
11
+ require 'utility/exception_tracking'
12
+
13
+ module Utility
14
+ class ErrorMonitor
15
+ class MonitoringError < StandardError
16
+ attr_accessor :tripped_by
17
+
18
+ def initialize(message = nil, tripped_by: nil)
19
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
+ @tripped_by = tripped_by
21
+ end
22
+ end
23
+
24
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
+ class MaxErrorsExceededError < MonitoringError; end
26
+ class MaxErrorsInWindowExceededError < MonitoringError; end
27
+
28
+ attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
+
30
+ def initialize(
31
+ max_errors: 1000,
32
+ max_consecutive_errors: 10,
33
+ max_error_ratio: 0.15,
34
+ window_size: 100,
35
+ error_queue_size: 20
36
+ )
37
+ @max_errors = max_errors
38
+ @max_consecutive_errors = max_consecutive_errors
39
+ @max_error_ratio = max_error_ratio
40
+ @window_size = window_size
41
+ @total_error_count = 0
42
+ @success_count = 0
43
+ @consecutive_error_count = 0
44
+ @window_errors = Array.new(window_size) { false }
45
+ @window_index = 0
46
+ @last_error = nil
47
+ @error_queue_size = error_queue_size
48
+ @error_queue = []
49
+ end
50
+
51
+ def note_success
52
+ @consecutive_error_count = 0
53
+ @success_count += 1
54
+ increment_window_index
55
+ end
56
+
57
+ def note_error(error, id: Time.now.to_i)
58
+ stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
+ error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
+ Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
+ @total_error_count += 1
62
+ @consecutive_error_count += 1
63
+ @window_errors[@window_index] = true
64
+ @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
+ @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
+ increment_window_index
67
+ @last_error = error
68
+
69
+ raise_if_necessary
70
+ end
71
+
72
+ def finalize
73
+ total_documents = @total_error_count + @success_count
74
+ if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
+ raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def raise_if_necessary
82
+ error =
83
+ if @consecutive_error_count > @max_consecutive_errors
84
+ MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
+ elsif @total_error_count > @max_errors
86
+ MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
+ elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
+ MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
+ end
90
+
91
+ raise_with_last_cause(error) if error
92
+ end
93
+
94
+ def num_errors_in_window
95
+ @window_errors.count(&:itself).to_f
96
+ end
97
+
98
+ def increment_window_index
99
+ @window_index = (@window_index + 1) % @window_size
100
+ end
101
+
102
+ def raise_with_last_cause(error)
103
+ raise @last_error
104
+ rescue StandardError
105
+ raise error
106
+ end
107
+ end
108
+ end
@@ -60,18 +60,6 @@ module Utility
60
60
  class JobDocumentLimitError < StandardError; end
61
61
  class JobClaimingError < StandardError; end
62
62
 
63
- class MonitoringError < StandardError
64
- attr_accessor :tripped_by
65
-
66
- def initialize(message = nil, tripped_by: nil)
67
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
68
- @tripped_by = tripped_by
69
- end
70
- end
71
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
72
- class MaxErrorsExceededError < MonitoringError; end
73
- class MaxErrorsInWindowExceededError < MonitoringError; end
74
-
75
63
  class JobSyncNotPossibleYetError < StandardError
76
64
  attr_accessor :sync_will_be_possible_at
77
65
 
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -23,7 +23,7 @@ module Utility
23
23
  end
24
24
 
25
25
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
26
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
27
  end
28
28
 
29
29
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,21 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
9
+ require 'utility/bulk_queue'
10
+ require 'utility/common'
7
11
  require 'utility/constants'
8
12
  require 'utility/cron'
9
- require 'utility/common'
13
+ require 'utility/elasticsearch/index/mappings'
14
+ require 'utility/elasticsearch/index/text_analysis_settings'
15
+ require 'utility/environment'
16
+ require 'utility/error_monitor'
10
17
  require 'utility/errors'
18
+ require 'utility/filtering'
11
19
  require 'utility/es_client'
12
- require 'utility/environment'
13
20
  require 'utility/exception_tracking'
14
21
  require 'utility/extension_mapping_util'
15
22
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221104T200814Z
4
+ version: 8.6.0.4.pre.20221116T024501Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-04 00:00:00.000000000 Z
11
+ date: 2022-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -396,34 +396,49 @@ files:
396
396
  - lib/app/version.rb
397
397
  - lib/connectors.rb
398
398
  - lib/connectors/base/adapter.rb
399
+ - lib/connectors/base/advanced_snippet_against_schema_validator.rb
400
+ - lib/connectors/base/advanced_snippet_validator.rb
399
401
  - lib/connectors/base/connector.rb
400
402
  - lib/connectors/base/custom_client.rb
403
+ - lib/connectors/base/simple_rules_parser.rb
401
404
  - lib/connectors/connector_status.rb
402
405
  - lib/connectors/crawler/scheduler.rb
403
406
  - lib/connectors/example/attachments/first_attachment.txt
404
407
  - lib/connectors/example/attachments/second_attachment.txt
405
408
  - lib/connectors/example/attachments/third_attachment.txt
406
409
  - lib/connectors/example/connector.rb
410
+ - lib/connectors/example/example_advanced_snippet_validator.rb
407
411
  - lib/connectors/gitlab/adapter.rb
408
412
  - lib/connectors/gitlab/connector.rb
409
413
  - lib/connectors/gitlab/custom_client.rb
410
414
  - lib/connectors/gitlab/extractor.rb
415
+ - lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
411
416
  - lib/connectors/mongodb/connector.rb
417
+ - lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
418
+ - lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
419
+ - lib/connectors/mongodb/mongo_rules_parser.rb
412
420
  - lib/connectors/registry.rb
413
421
  - lib/connectors/sync_status.rb
422
+ - lib/connectors/tolerable_error_helper.rb
414
423
  - lib/connectors_service.rb
415
424
  - lib/connectors_utility.rb
416
425
  - lib/core.rb
417
426
  - lib/core/configuration.rb
427
+ - lib/core/connector_job.rb
418
428
  - lib/core/connector_settings.rb
419
429
  - lib/core/elastic_connector_actions.rb
430
+ - lib/core/filtering.rb
431
+ - lib/core/filtering/post_process_engine.rb
432
+ - lib/core/filtering/post_process_result.rb
433
+ - lib/core/filtering/simple_rule.rb
434
+ - lib/core/filtering/validation_job_runner.rb
435
+ - lib/core/filtering/validation_status.rb
420
436
  - lib/core/heartbeat.rb
437
+ - lib/core/ingestion.rb
438
+ - lib/core/ingestion/es_sink.rb
439
+ - lib/core/jobs/consumer.rb
440
+ - lib/core/jobs/producer.rb
421
441
  - lib/core/native_scheduler.rb
422
- - lib/core/output_sink.rb
423
- - lib/core/output_sink/base_sink.rb
424
- - lib/core/output_sink/combined_sink.rb
425
- - lib/core/output_sink/console_sink.rb
426
- - lib/core/output_sink/es_sink.rb
427
442
  - lib/core/scheduler.rb
428
443
  - lib/core/single_scheduler.rb
429
444
  - lib/core/sync_job_runner.rb
@@ -432,6 +447,7 @@ files:
432
447
  - lib/stubs/connectors/stats.rb
433
448
  - lib/stubs/service_type.rb
434
449
  - lib/utility.rb
450
+ - lib/utility/bulk_queue.rb
435
451
  - lib/utility/common.rb
436
452
  - lib/utility/constants.rb
437
453
  - lib/utility/cron.rb
@@ -439,10 +455,12 @@ files:
439
455
  - lib/utility/elasticsearch/index/mappings.rb
440
456
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
441
457
  - lib/utility/environment.rb
458
+ - lib/utility/error_monitor.rb
442
459
  - lib/utility/errors.rb
443
460
  - lib/utility/es_client.rb
444
461
  - lib/utility/exception_tracking.rb
445
462
  - lib/utility/extension_mapping_util.rb
463
+ - lib/utility/filtering.rb
446
464
  - lib/utility/logger.rb
447
465
  - lib/utility/middleware/basic_auth.rb
448
466
  - lib/utility/middleware/bearer_auth.rb
@@ -1,33 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module OutputSink
11
- class BaseSink
12
- def ingest(_document)
13
- raise 'not implemented'
14
- end
15
-
16
- def ingest_multiple(_documents)
17
- raise 'not implemented'
18
- end
19
-
20
- def delete(_id)
21
- raise 'not implemented'
22
- end
23
-
24
- def delete_multiple(_ids)
25
- raise 'not implemented'
26
- end
27
-
28
- def flush(_size: nil)
29
- raise 'not implemented'
30
- end
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink/base_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class CombinedSink < Core::OutputSink::BaseSink
14
- def initialize(sinks = [])
15
- @sinks = sinks
16
- end
17
-
18
- def ingest(document)
19
- @sinks.each { |sink| sink.ingest(document) }
20
- end
21
-
22
- def flush(size: nil)
23
- @sinks.each { |sink| sink.flush(size: size) }
24
- end
25
-
26
- def ingest_multiple(documents)
27
- @sinks.each { |sink| sink.ingest_multiple(documents) }
28
- end
29
-
30
- def delete(id)
31
- @sinks.each { |sink| sink.delete(id) }
32
- end
33
-
34
- def delete_multiple(ids)
35
- @sinks.each { |sink| sink.delete_multiple(ids) }
36
- end
37
- end
38
- end
@@ -1,51 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class ConsoleSink < Core::OutputSink::BaseSink
14
- def ingest(document)
15
- print_header 'Got a single document:'
16
- puts document
17
- end
18
-
19
- def flush(size: nil)
20
- print_header 'Flushing'
21
- puts "Flush size: #{size}"
22
- end
23
-
24
- def ingest_multiple(documents)
25
- print_header 'Got multiple documents:'
26
- puts documents
27
- end
28
-
29
- def delete(id)
30
- print_header "Deleting single id: #{id}"
31
- puts id
32
- end
33
-
34
- def delete_multiple(ids)
35
- print_header "Deleting several ids: #{ids}"
36
- puts ids
37
- end
38
-
39
- private
40
-
41
- def print_delim
42
- puts '----------------------------------------------------'
43
- end
44
-
45
- def print_header(header)
46
- print_delim
47
- puts header
48
- print_delim
49
- end
50
- end
51
- end