connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/dispatcher.rb +42 -17
  5. data/lib/app/preflight_check.rb +11 -0
  6. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  7. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  8. data/lib/connectors/base/connector.rb +43 -14
  9. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  10. data/lib/connectors/example/connector.rb +6 -0
  11. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  12. data/lib/connectors/gitlab/connector.rb +6 -1
  13. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  14. data/lib/connectors/mongodb/connector.rb +47 -43
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  17. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  18. data/lib/connectors/sync_status.rb +6 -1
  19. data/lib/connectors/tolerable_error_helper.rb +43 -0
  20. data/lib/core/configuration.rb +3 -1
  21. data/lib/core/connector_job.rb +210 -0
  22. data/lib/core/connector_settings.rb +52 -16
  23. data/lib/core/elastic_connector_actions.rb +320 -59
  24. data/lib/core/filtering/post_process_engine.rb +39 -0
  25. data/lib/core/filtering/post_process_result.rb +27 -0
  26. data/lib/core/filtering/simple_rule.rb +141 -0
  27. data/lib/core/filtering/validation_job_runner.rb +53 -0
  28. data/lib/core/filtering/validation_status.rb +17 -0
  29. data/lib/core/filtering.rb +17 -0
  30. data/lib/core/ingestion/es_sink.rb +118 -0
  31. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  32. data/lib/core/jobs/consumer.rb +114 -0
  33. data/lib/core/jobs/producer.rb +26 -0
  34. data/lib/core/scheduler.rb +40 -10
  35. data/lib/core/single_scheduler.rb +1 -1
  36. data/lib/core/sync_job_runner.rb +72 -16
  37. data/lib/core.rb +4 -0
  38. data/lib/utility/bulk_queue.rb +85 -0
  39. data/lib/utility/constants.rb +2 -0
  40. data/lib/utility/error_monitor.rb +108 -0
  41. data/lib/utility/errors.rb +0 -12
  42. data/lib/utility/filtering.rb +22 -0
  43. data/lib/utility/logger.rb +1 -1
  44. data/lib/utility.rb +11 -4
  45. metadata +25 -7
  46. data/lib/core/output_sink/base_sink.rb +0 -33
  47. data/lib/core/output_sink/combined_sink.rb +0 -38
  48. data/lib/core/output_sink/console_sink.rb +0 -51
  49. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,16 +21,21 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
22
- def initialize(connector_settings)
24
+ JOB_REPORTING_INTERVAL = 10
25
+
26
+ def initialize(connector_settings, job)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @sink = Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
25
29
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
30
  @sync_finished = false
31
+ @sync_error = nil
27
32
  @status = {
28
33
  :indexed_document_count => 0,
29
34
  :deleted_document_count => 0,
35
+ :indexed_document_volume => 0,
30
36
  :error => nil
31
37
  }
38
+ @job = job
32
39
  end
33
40
 
34
41
  def execute
@@ -41,8 +48,17 @@ module Core
41
48
  def do_sync!
42
49
  Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
50
 
44
- job_description = ElasticConnectorActions.claim_job(@connector_settings.id)
45
- job_id = job_description['_id']
51
+ # connector service doesn't support multiple jobs running simultaneously
52
+ raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
53
+
54
+ Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
55
+
56
+ # claim the job
57
+ @job.make_running!
58
+
59
+ job_description = @job.es_source
60
+ job_id = @job.id
61
+ job_description['_id'] = job_id
46
62
 
47
63
  unless job_id.present?
48
64
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -52,6 +68,10 @@ module Core
52
68
  begin
53
69
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
54
70
 
71
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
72
+ validate_filtering(job_description.dig(:connector, :filtering))
73
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
74
+
55
75
  connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
56
76
 
57
77
  connector_instance.do_health_check!
@@ -61,11 +81,21 @@ module Core
61
81
 
62
82
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
63
83
 
84
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
85
+ reporting_cycle_start = Time.now
86
+ Utility::Logger.info('Yielding documents...')
64
87
  connector_instance.yield_documents do |document|
65
88
  document = add_ingest_metadata(document)
66
- @sink.ingest(document)
67
- incoming_ids << document['id']
68
- @status[:indexed_document_count] += 1
89
+ post_process_result = post_processing_engine.process(document)
90
+ if post_process_result.is_include?
91
+ @sink.ingest(document)
92
+ incoming_ids << document['id']
93
+ end
94
+
95
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
96
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
97
+ reporting_cycle_start = Time.now
98
+ end
69
99
  end
70
100
 
71
101
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -74,7 +104,11 @@ module Core
74
104
 
75
105
  ids_to_delete.each do |id|
76
106
  @sink.delete(id)
77
- @status[:deleted_document_count] += 1
107
+
108
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
109
+ ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
110
+ reporting_cycle_start = Time.now
111
+ end
78
112
  end
79
113
 
80
114
  @sink.flush
@@ -83,22 +117,34 @@ module Core
83
117
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
84
118
  @sync_finished = true
85
119
  rescue StandardError => e
86
- @status[:error] = e.message
120
+ @sync_error = e.message
87
121
  Utility::ExceptionTracking.log_exception(e)
88
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
89
122
  ensure
123
+ stats = @sink.ingestion_stats
124
+
125
+ Utility::Logger.debug("Sync stats are: #{stats}")
126
+
127
+ @status[:indexed_document_count] = stats[:indexed_document_count]
128
+ @status[:deleted_document_count] = stats[:deleted_document_count]
129
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
130
+
90
131
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
91
132
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
92
133
 
93
134
  # Make sure to not override a previous error message
94
- if !@sync_finished && @status[:error].nil?
95
- @status[:error] = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
135
+ if !@sync_finished && @sync_error.nil?
136
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
137
+ end
138
+
139
+ unless connector_instance.nil?
140
+ metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
141
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
96
142
  end
97
143
 
98
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
144
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
99
145
 
100
- if @status[:error]
101
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
146
+ if @sync_error
147
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
102
148
  else
103
149
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
104
150
  end
@@ -119,5 +165,15 @@ module Core
119
165
 
120
166
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
121
167
  end
168
+
169
+ def validate_filtering(filtering)
170
+ validation_result = @connector_class.validate_filtering(filtering)
171
+
172
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
173
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
174
+
175
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
176
+ raise errors_present_error if validation_result[:errors].present?
177
+ end
122
178
  end
123
179
  end
data/lib/core.rb CHANGED
@@ -7,10 +7,14 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
15
17
  require 'core/native_scheduler'
16
18
  require 'core/sync_job_runner'
19
+ require 'core/jobs/producer'
20
+ require 'core/jobs/consumer'
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end
@@ -0,0 +1,108 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'utility/errors'
11
+ require 'utility/exception_tracking'
12
+
13
+ module Utility
14
+ class ErrorMonitor
15
+ class MonitoringError < StandardError
16
+ attr_accessor :tripped_by
17
+
18
+ def initialize(message = nil, tripped_by: nil)
19
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
+ @tripped_by = tripped_by
21
+ end
22
+ end
23
+
24
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
+ class MaxErrorsExceededError < MonitoringError; end
26
+ class MaxErrorsInWindowExceededError < MonitoringError; end
27
+
28
+ attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
+
30
+ def initialize(
31
+ max_errors: 1000,
32
+ max_consecutive_errors: 10,
33
+ max_error_ratio: 0.15,
34
+ window_size: 100,
35
+ error_queue_size: 20
36
+ )
37
+ @max_errors = max_errors
38
+ @max_consecutive_errors = max_consecutive_errors
39
+ @max_error_ratio = max_error_ratio
40
+ @window_size = window_size
41
+ @total_error_count = 0
42
+ @success_count = 0
43
+ @consecutive_error_count = 0
44
+ @window_errors = Array.new(window_size) { false }
45
+ @window_index = 0
46
+ @last_error = nil
47
+ @error_queue_size = error_queue_size
48
+ @error_queue = []
49
+ end
50
+
51
+ def note_success
52
+ @consecutive_error_count = 0
53
+ @success_count += 1
54
+ increment_window_index
55
+ end
56
+
57
+ def note_error(error, id: Time.now.to_i)
58
+ stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
+ error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
+ Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
+ @total_error_count += 1
62
+ @consecutive_error_count += 1
63
+ @window_errors[@window_index] = true
64
+ @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
+ @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
+ increment_window_index
67
+ @last_error = error
68
+
69
+ raise_if_necessary
70
+ end
71
+
72
+ def finalize
73
+ total_documents = @total_error_count + @success_count
74
+ if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
+ raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def raise_if_necessary
82
+ error =
83
+ if @consecutive_error_count > @max_consecutive_errors
84
+ MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
+ elsif @total_error_count > @max_errors
86
+ MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
+ elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
+ MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
+ end
90
+
91
+ raise_with_last_cause(error) if error
92
+ end
93
+
94
+ def num_errors_in_window
95
+ @window_errors.count(&:itself).to_f
96
+ end
97
+
98
+ def increment_window_index
99
+ @window_index = (@window_index + 1) % @window_size
100
+ end
101
+
102
+ def raise_with_last_cause(error)
103
+ raise @last_error
104
+ rescue StandardError
105
+ raise error
106
+ end
107
+ end
108
+ end
@@ -60,18 +60,6 @@ module Utility
60
60
  class JobDocumentLimitError < StandardError; end
61
61
  class JobClaimingError < StandardError; end
62
62
 
63
- class MonitoringError < StandardError
64
- attr_accessor :tripped_by
65
-
66
- def initialize(message = nil, tripped_by: nil)
67
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
68
- @tripped_by = tripped_by
69
- end
70
- end
71
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
72
- class MaxErrorsExceededError < MonitoringError; end
73
- class MaxErrorsInWindowExceededError < MonitoringError; end
74
-
75
63
  class JobSyncNotPossibleYetError < StandardError
76
64
  attr_accessor :sync_will_be_possible_at
77
65
 
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -23,7 +23,7 @@ module Utility
23
23
  end
24
24
 
25
25
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
26
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
27
  end
28
28
 
29
29
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,21 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
9
+ require 'utility/bulk_queue'
10
+ require 'utility/common'
7
11
  require 'utility/constants'
8
12
  require 'utility/cron'
9
- require 'utility/common'
13
+ require 'utility/elasticsearch/index/mappings'
14
+ require 'utility/elasticsearch/index/text_analysis_settings'
15
+ require 'utility/environment'
16
+ require 'utility/error_monitor'
10
17
  require 'utility/errors'
18
+ require 'utility/filtering'
11
19
  require 'utility/es_client'
12
- require 'utility/environment'
13
20
  require 'utility/exception_tracking'
14
21
  require 'utility/extension_mapping_util'
15
22
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221104T200814Z
4
+ version: 8.6.0.4.pre.20221116T024501Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-04 00:00:00.000000000 Z
11
+ date: 2022-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -396,34 +396,49 @@ files:
396
396
  - lib/app/version.rb
397
397
  - lib/connectors.rb
398
398
  - lib/connectors/base/adapter.rb
399
+ - lib/connectors/base/advanced_snippet_against_schema_validator.rb
400
+ - lib/connectors/base/advanced_snippet_validator.rb
399
401
  - lib/connectors/base/connector.rb
400
402
  - lib/connectors/base/custom_client.rb
403
+ - lib/connectors/base/simple_rules_parser.rb
401
404
  - lib/connectors/connector_status.rb
402
405
  - lib/connectors/crawler/scheduler.rb
403
406
  - lib/connectors/example/attachments/first_attachment.txt
404
407
  - lib/connectors/example/attachments/second_attachment.txt
405
408
  - lib/connectors/example/attachments/third_attachment.txt
406
409
  - lib/connectors/example/connector.rb
410
+ - lib/connectors/example/example_advanced_snippet_validator.rb
407
411
  - lib/connectors/gitlab/adapter.rb
408
412
  - lib/connectors/gitlab/connector.rb
409
413
  - lib/connectors/gitlab/custom_client.rb
410
414
  - lib/connectors/gitlab/extractor.rb
415
+ - lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
411
416
  - lib/connectors/mongodb/connector.rb
417
+ - lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
418
+ - lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
419
+ - lib/connectors/mongodb/mongo_rules_parser.rb
412
420
  - lib/connectors/registry.rb
413
421
  - lib/connectors/sync_status.rb
422
+ - lib/connectors/tolerable_error_helper.rb
414
423
  - lib/connectors_service.rb
415
424
  - lib/connectors_utility.rb
416
425
  - lib/core.rb
417
426
  - lib/core/configuration.rb
427
+ - lib/core/connector_job.rb
418
428
  - lib/core/connector_settings.rb
419
429
  - lib/core/elastic_connector_actions.rb
430
+ - lib/core/filtering.rb
431
+ - lib/core/filtering/post_process_engine.rb
432
+ - lib/core/filtering/post_process_result.rb
433
+ - lib/core/filtering/simple_rule.rb
434
+ - lib/core/filtering/validation_job_runner.rb
435
+ - lib/core/filtering/validation_status.rb
420
436
  - lib/core/heartbeat.rb
437
+ - lib/core/ingestion.rb
438
+ - lib/core/ingestion/es_sink.rb
439
+ - lib/core/jobs/consumer.rb
440
+ - lib/core/jobs/producer.rb
421
441
  - lib/core/native_scheduler.rb
422
- - lib/core/output_sink.rb
423
- - lib/core/output_sink/base_sink.rb
424
- - lib/core/output_sink/combined_sink.rb
425
- - lib/core/output_sink/console_sink.rb
426
- - lib/core/output_sink/es_sink.rb
427
442
  - lib/core/scheduler.rb
428
443
  - lib/core/single_scheduler.rb
429
444
  - lib/core/sync_job_runner.rb
@@ -432,6 +447,7 @@ files:
432
447
  - lib/stubs/connectors/stats.rb
433
448
  - lib/stubs/service_type.rb
434
449
  - lib/utility.rb
450
+ - lib/utility/bulk_queue.rb
435
451
  - lib/utility/common.rb
436
452
  - lib/utility/constants.rb
437
453
  - lib/utility/cron.rb
@@ -439,10 +455,12 @@ files:
439
455
  - lib/utility/elasticsearch/index/mappings.rb
440
456
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
441
457
  - lib/utility/environment.rb
458
+ - lib/utility/error_monitor.rb
442
459
  - lib/utility/errors.rb
443
460
  - lib/utility/es_client.rb
444
461
  - lib/utility/exception_tracking.rb
445
462
  - lib/utility/extension_mapping_util.rb
463
+ - lib/utility/filtering.rb
446
464
  - lib/utility/logger.rb
447
465
  - lib/utility/middleware/basic_auth.rb
448
466
  - lib/utility/middleware/bearer_auth.rb
@@ -1,33 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module OutputSink
11
- class BaseSink
12
- def ingest(_document)
13
- raise 'not implemented'
14
- end
15
-
16
- def ingest_multiple(_documents)
17
- raise 'not implemented'
18
- end
19
-
20
- def delete(_id)
21
- raise 'not implemented'
22
- end
23
-
24
- def delete_multiple(_ids)
25
- raise 'not implemented'
26
- end
27
-
28
- def flush(_size: nil)
29
- raise 'not implemented'
30
- end
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink/base_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class CombinedSink < Core::OutputSink::BaseSink
14
- def initialize(sinks = [])
15
- @sinks = sinks
16
- end
17
-
18
- def ingest(document)
19
- @sinks.each { |sink| sink.ingest(document) }
20
- end
21
-
22
- def flush(size: nil)
23
- @sinks.each { |sink| sink.flush(size: size) }
24
- end
25
-
26
- def ingest_multiple(documents)
27
- @sinks.each { |sink| sink.ingest_multiple(documents) }
28
- end
29
-
30
- def delete(id)
31
- @sinks.each { |sink| sink.delete(id) }
32
- end
33
-
34
- def delete_multiple(ids)
35
- @sinks.each { |sink| sink.delete_multiple(ids) }
36
- end
37
- end
38
- end
@@ -1,51 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class ConsoleSink < Core::OutputSink::BaseSink
14
- def ingest(document)
15
- print_header 'Got a single document:'
16
- puts document
17
- end
18
-
19
- def flush(size: nil)
20
- print_header 'Flushing'
21
- puts "Flush size: #{size}"
22
- end
23
-
24
- def ingest_multiple(documents)
25
- print_header 'Got multiple documents:'
26
- puts documents
27
- end
28
-
29
- def delete(id)
30
- print_header "Deleting single id: #{id}"
31
- puts id
32
- end
33
-
34
- def delete_multiple(ids)
35
- print_header "Deleting several ids: #{ids}"
36
- puts ids
37
- end
38
-
39
- private
40
-
41
- def print_delim
42
- puts '----------------------------------------------------'
43
- end
44
-
45
- def print_header(header)
46
- print_delim
47
- puts header
48
- print_delim
49
- end
50
- end
51
- end