connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T004928Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -8
- data/lib/app/app.rb +4 -0
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +44 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +132 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +80 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +87 -0
- data/lib/utility/constants.rb +7 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +31 -12
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,16 +21,29 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
22
|
-
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
26
|
+
def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@sink = Core::
|
28
|
+
@sink = Core::Ingestion::EsSink.new(
|
29
|
+
connector_settings.index_name,
|
30
|
+
@connector_settings.request_pipeline,
|
31
|
+
Utility::BulkQueue.new(
|
32
|
+
max_ingestion_queue_size,
|
33
|
+
max_ingestion_queue_bytes
|
34
|
+
),
|
35
|
+
max_ingestion_queue_bytes
|
36
|
+
)
|
25
37
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
38
|
@sync_finished = false
|
39
|
+
@sync_error = nil
|
27
40
|
@status = {
|
28
41
|
:indexed_document_count => 0,
|
29
42
|
:deleted_document_count => 0,
|
43
|
+
:indexed_document_volume => 0,
|
30
44
|
:error => nil
|
31
45
|
}
|
46
|
+
@job = job
|
32
47
|
end
|
33
48
|
|
34
49
|
def execute
|
@@ -41,8 +56,17 @@ module Core
|
|
41
56
|
def do_sync!
|
42
57
|
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
58
|
|
44
|
-
|
45
|
-
|
59
|
+
# connector service doesn't support multiple jobs running simultaneously
|
60
|
+
raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
|
61
|
+
|
62
|
+
Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
|
63
|
+
|
64
|
+
# claim the job
|
65
|
+
@job.make_running!
|
66
|
+
|
67
|
+
job_description = @job.es_source
|
68
|
+
job_id = @job.id
|
69
|
+
job_description['_id'] = job_id
|
46
70
|
|
47
71
|
unless job_id.present?
|
48
72
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -52,6 +76,10 @@ module Core
|
|
52
76
|
begin
|
53
77
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
54
78
|
|
79
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
80
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
81
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
82
|
+
|
55
83
|
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
56
84
|
|
57
85
|
connector_instance.do_health_check!
|
@@ -61,11 +89,21 @@ module Core
|
|
61
89
|
|
62
90
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
63
91
|
|
92
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
93
|
+
reporting_cycle_start = Time.now
|
94
|
+
Utility::Logger.info('Yielding documents...')
|
64
95
|
connector_instance.yield_documents do |document|
|
65
96
|
document = add_ingest_metadata(document)
|
66
|
-
|
67
|
-
|
68
|
-
|
97
|
+
post_process_result = post_processing_engine.process(document)
|
98
|
+
if post_process_result.is_include?
|
99
|
+
@sink.ingest(document)
|
100
|
+
incoming_ids << document['id']
|
101
|
+
end
|
102
|
+
|
103
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
104
|
+
ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
105
|
+
reporting_cycle_start = Time.now
|
106
|
+
end
|
69
107
|
end
|
70
108
|
|
71
109
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -74,7 +112,11 @@ module Core
|
|
74
112
|
|
75
113
|
ids_to_delete.each do |id|
|
76
114
|
@sink.delete(id)
|
77
|
-
|
115
|
+
|
116
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
117
|
+
ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
118
|
+
reporting_cycle_start = Time.now
|
119
|
+
end
|
78
120
|
end
|
79
121
|
|
80
122
|
@sink.flush
|
@@ -83,22 +125,34 @@ module Core
|
|
83
125
|
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
84
126
|
@sync_finished = true
|
85
127
|
rescue StandardError => e
|
86
|
-
@
|
128
|
+
@sync_error = e.message
|
87
129
|
Utility::ExceptionTracking.log_exception(e)
|
88
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
89
130
|
ensure
|
131
|
+
stats = @sink.ingestion_stats
|
132
|
+
|
133
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
134
|
+
|
135
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
136
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
137
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
138
|
+
|
90
139
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
91
140
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
92
141
|
|
93
142
|
# Make sure to not override a previous error message
|
94
|
-
if !@sync_finished && @
|
95
|
-
@
|
143
|
+
if !@sync_finished && @sync_error.nil?
|
144
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
145
|
+
end
|
146
|
+
|
147
|
+
unless connector_instance.nil?
|
148
|
+
metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
149
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
96
150
|
end
|
97
151
|
|
98
|
-
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @
|
152
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
99
153
|
|
100
|
-
if @
|
101
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@
|
154
|
+
if @sync_error
|
155
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
102
156
|
else
|
103
157
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
104
158
|
end
|
@@ -119,5 +173,15 @@ module Core
|
|
119
173
|
|
120
174
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
121
175
|
end
|
176
|
+
|
177
|
+
def validate_filtering(filtering)
|
178
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
179
|
+
|
180
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
181
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
182
|
+
|
183
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
184
|
+
raise errors_present_error if validation_result[:errors].present?
|
185
|
+
end
|
122
186
|
end
|
123
187
|
end
|
data/lib/core.rb
CHANGED
@@ -7,10 +7,14 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
15
17
|
require 'core/native_scheduler'
|
16
18
|
require 'core/sync_job_runner'
|
19
|
+
require 'core/jobs/producer'
|
20
|
+
require 'core/jobs/consumer'
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
require 'utility/constants'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
class BulkQueue
|
13
|
+
class QueueOverflowError < StandardError; end
|
14
|
+
|
15
|
+
# 500 items or 5MB
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
17
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
18
|
+
@size_threshold = size_threshold.freeze
|
19
|
+
|
20
|
+
@buffer = ''
|
21
|
+
|
22
|
+
@current_operation_count = 0
|
23
|
+
|
24
|
+
@current_buffer_size = 0
|
25
|
+
@current_data_size = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
def pop_all
|
29
|
+
result = @buffer
|
30
|
+
|
31
|
+
reset
|
32
|
+
|
33
|
+
result
|
34
|
+
end
|
35
|
+
|
36
|
+
def add(operation, payload = nil)
|
37
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
38
|
+
|
39
|
+
operation_size = get_size(operation)
|
40
|
+
payload_size = get_size(payload)
|
41
|
+
|
42
|
+
@current_operation_count += 1
|
43
|
+
@current_buffer_size += operation_size
|
44
|
+
@current_buffer_size += payload_size
|
45
|
+
@current_data_size += payload_size
|
46
|
+
|
47
|
+
@buffer << operation
|
48
|
+
@buffer << "\n"
|
49
|
+
|
50
|
+
if payload
|
51
|
+
@buffer << payload
|
52
|
+
@buffer << "\n"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def will_fit?(operation, payload = nil)
|
57
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
58
|
+
|
59
|
+
operation_size = get_size(operation)
|
60
|
+
payload_size = get_size(payload)
|
61
|
+
|
62
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
63
|
+
end
|
64
|
+
|
65
|
+
def current_stats
|
66
|
+
{
|
67
|
+
:current_operation_count => @current_operation_count,
|
68
|
+
:current_buffer_size => @current_buffer_size
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def get_size(str)
|
75
|
+
return 0 unless str
|
76
|
+
str.bytesize
|
77
|
+
end
|
78
|
+
|
79
|
+
def reset
|
80
|
+
@current_operation_count = 0
|
81
|
+
@current_buffer_size = 0
|
82
|
+
@current_data_size = 0
|
83
|
+
|
84
|
+
@buffer = ''
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/utility/constants.rb
CHANGED
@@ -16,5 +16,12 @@ module Utility
|
|
16
16
|
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
17
|
CONTENT_INDEX_PREFIX = 'search-'
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
+
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
|
+
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
|
+
|
22
|
+
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
|
+
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
24
|
+
# Maximum size of either whole BULK Elasticsearch operation or one document in it
|
25
|
+
DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
|
19
26
|
end
|
20
27
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'utility/errors'
|
11
|
+
require 'utility/exception_tracking'
|
12
|
+
|
13
|
+
module Utility
|
14
|
+
class ErrorMonitor
|
15
|
+
class MonitoringError < StandardError
|
16
|
+
attr_accessor :tripped_by
|
17
|
+
|
18
|
+
def initialize(message = nil, tripped_by: nil)
|
19
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
+
@tripped_by = tripped_by
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
+
class MaxErrorsExceededError < MonitoringError; end
|
26
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
+
|
28
|
+
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
+
|
30
|
+
def initialize(
|
31
|
+
max_errors: 1000,
|
32
|
+
max_consecutive_errors: 10,
|
33
|
+
max_error_ratio: 0.15,
|
34
|
+
window_size: 100,
|
35
|
+
error_queue_size: 20
|
36
|
+
)
|
37
|
+
@max_errors = max_errors
|
38
|
+
@max_consecutive_errors = max_consecutive_errors
|
39
|
+
@max_error_ratio = max_error_ratio
|
40
|
+
@window_size = window_size
|
41
|
+
@total_error_count = 0
|
42
|
+
@success_count = 0
|
43
|
+
@consecutive_error_count = 0
|
44
|
+
@window_errors = Array.new(window_size) { false }
|
45
|
+
@window_index = 0
|
46
|
+
@last_error = nil
|
47
|
+
@error_queue_size = error_queue_size
|
48
|
+
@error_queue = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def note_success
|
52
|
+
@consecutive_error_count = 0
|
53
|
+
@success_count += 1
|
54
|
+
increment_window_index
|
55
|
+
end
|
56
|
+
|
57
|
+
def note_error(error, id: Time.now.to_i)
|
58
|
+
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
+
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
+
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
+
@total_error_count += 1
|
62
|
+
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
64
|
+
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
+
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
+
increment_window_index
|
67
|
+
@last_error = error
|
68
|
+
|
69
|
+
raise_if_necessary
|
70
|
+
end
|
71
|
+
|
72
|
+
def finalize
|
73
|
+
total_documents = @total_error_count + @success_count
|
74
|
+
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
+
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def raise_if_necessary
|
82
|
+
error =
|
83
|
+
if @consecutive_error_count > @max_consecutive_errors
|
84
|
+
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
+
elsif @total_error_count > @max_errors
|
86
|
+
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
+
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
+
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
+
end
|
90
|
+
|
91
|
+
raise_with_last_cause(error) if error
|
92
|
+
end
|
93
|
+
|
94
|
+
def num_errors_in_window
|
95
|
+
@window_errors.count(&:itself).to_f
|
96
|
+
end
|
97
|
+
|
98
|
+
def increment_window_index
|
99
|
+
@window_index = (@window_index + 1) % @window_size
|
100
|
+
end
|
101
|
+
|
102
|
+
def raise_with_last_cause(error)
|
103
|
+
raise @last_error
|
104
|
+
rescue StandardError
|
105
|
+
raise error
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/utility/errors.rb
CHANGED
@@ -60,18 +60,6 @@ module Utility
|
|
60
60
|
class JobDocumentLimitError < StandardError; end
|
61
61
|
class JobClaimingError < StandardError; end
|
62
62
|
|
63
|
-
class MonitoringError < StandardError
|
64
|
-
attr_accessor :tripped_by
|
65
|
-
|
66
|
-
def initialize(message = nil, tripped_by: nil)
|
67
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
68
|
-
@tripped_by = tripped_by
|
69
|
-
end
|
70
|
-
end
|
71
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
72
|
-
class MaxErrorsExceededError < MonitoringError; end
|
73
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
74
|
-
|
75
63
|
class JobSyncNotPossibleYetError < StandardError
|
76
64
|
attr_accessor :sync_will_be_possible_at
|
77
65
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Filtering
|
11
|
+
class << self
|
12
|
+
def extract_filter(filtering)
|
13
|
+
return {} unless filtering.present?
|
14
|
+
|
15
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
+
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
+
|
18
|
+
filter.present? ? filter : {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/utility/logger.rb
CHANGED
@@ -23,7 +23,7 @@ module Utility
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
26
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
27
|
end
|
28
28
|
|
29
29
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,21 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
# !!!!!!!!
|
8
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
9
|
+
require 'utility/bulk_queue'
|
10
|
+
require 'utility/common'
|
7
11
|
require 'utility/constants'
|
8
12
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
13
|
+
require 'utility/elasticsearch/index/mappings'
|
14
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
15
|
+
require 'utility/environment'
|
16
|
+
require 'utility/error_monitor'
|
10
17
|
require 'utility/errors'
|
18
|
+
require 'utility/filtering'
|
11
19
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
20
|
require 'utility/exception_tracking'
|
14
21
|
require 'utility/extension_mapping_util'
|
15
22
|
require 'utility/logger'
|
16
|
-
|
17
|
-
|
23
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
24
|
+
# !!!!!!!!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T004928Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -396,34 +396,50 @@ files:
|
|
396
396
|
- lib/app/version.rb
|
397
397
|
- lib/connectors.rb
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
|
+
- lib/connectors/base/advanced_snippet_against_schema_validator.rb
|
400
|
+
- lib/connectors/base/advanced_snippet_validator.rb
|
399
401
|
- lib/connectors/base/connector.rb
|
400
402
|
- lib/connectors/base/custom_client.rb
|
403
|
+
- lib/connectors/base/simple_rules_parser.rb
|
401
404
|
- lib/connectors/connector_status.rb
|
402
405
|
- lib/connectors/crawler/scheduler.rb
|
403
406
|
- lib/connectors/example/attachments/first_attachment.txt
|
404
407
|
- lib/connectors/example/attachments/second_attachment.txt
|
405
408
|
- lib/connectors/example/attachments/third_attachment.txt
|
406
409
|
- lib/connectors/example/connector.rb
|
410
|
+
- lib/connectors/example/example_advanced_snippet_validator.rb
|
407
411
|
- lib/connectors/gitlab/adapter.rb
|
408
412
|
- lib/connectors/gitlab/connector.rb
|
409
413
|
- lib/connectors/gitlab/custom_client.rb
|
410
414
|
- lib/connectors/gitlab/extractor.rb
|
415
|
+
- lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
|
411
416
|
- lib/connectors/mongodb/connector.rb
|
417
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
|
418
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
|
419
|
+
- lib/connectors/mongodb/mongo_rules_parser.rb
|
412
420
|
- lib/connectors/registry.rb
|
413
421
|
- lib/connectors/sync_status.rb
|
422
|
+
- lib/connectors/tolerable_error_helper.rb
|
423
|
+
- lib/connectors_app/\
|
414
424
|
- lib/connectors_service.rb
|
415
425
|
- lib/connectors_utility.rb
|
416
426
|
- lib/core.rb
|
417
427
|
- lib/core/configuration.rb
|
428
|
+
- lib/core/connector_job.rb
|
418
429
|
- lib/core/connector_settings.rb
|
419
430
|
- lib/core/elastic_connector_actions.rb
|
431
|
+
- lib/core/filtering.rb
|
432
|
+
- lib/core/filtering/post_process_engine.rb
|
433
|
+
- lib/core/filtering/post_process_result.rb
|
434
|
+
- lib/core/filtering/simple_rule.rb
|
435
|
+
- lib/core/filtering/validation_job_runner.rb
|
436
|
+
- lib/core/filtering/validation_status.rb
|
420
437
|
- lib/core/heartbeat.rb
|
438
|
+
- lib/core/ingestion.rb
|
439
|
+
- lib/core/ingestion/es_sink.rb
|
440
|
+
- lib/core/jobs/consumer.rb
|
441
|
+
- lib/core/jobs/producer.rb
|
421
442
|
- lib/core/native_scheduler.rb
|
422
|
-
- lib/core/output_sink.rb
|
423
|
-
- lib/core/output_sink/base_sink.rb
|
424
|
-
- lib/core/output_sink/combined_sink.rb
|
425
|
-
- lib/core/output_sink/console_sink.rb
|
426
|
-
- lib/core/output_sink/es_sink.rb
|
427
443
|
- lib/core/scheduler.rb
|
428
444
|
- lib/core/single_scheduler.rb
|
429
445
|
- lib/core/sync_job_runner.rb
|
@@ -432,6 +448,7 @@ files:
|
|
432
448
|
- lib/stubs/connectors/stats.rb
|
433
449
|
- lib/stubs/service_type.rb
|
434
450
|
- lib/utility.rb
|
451
|
+
- lib/utility/bulk_queue.rb
|
435
452
|
- lib/utility/common.rb
|
436
453
|
- lib/utility/constants.rb
|
437
454
|
- lib/utility/cron.rb
|
@@ -439,10 +456,12 @@ files:
|
|
439
456
|
- lib/utility/elasticsearch/index/mappings.rb
|
440
457
|
- lib/utility/elasticsearch/index/text_analysis_settings.rb
|
441
458
|
- lib/utility/environment.rb
|
459
|
+
- lib/utility/error_monitor.rb
|
442
460
|
- lib/utility/errors.rb
|
443
461
|
- lib/utility/es_client.rb
|
444
462
|
- lib/utility/exception_tracking.rb
|
445
463
|
- lib/utility/extension_mapping_util.rb
|
464
|
+
- lib/utility/filtering.rb
|
446
465
|
- lib/utility/logger.rb
|
447
466
|
- lib/utility/middleware/basic_auth.rb
|
448
467
|
- lib/utility/middleware/bearer_auth.rb
|
@@ -451,7 +470,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
451
470
|
licenses:
|
452
471
|
- Elastic-2.0
|
453
472
|
metadata: {}
|
454
|
-
post_install_message:
|
473
|
+
post_install_message:
|
455
474
|
rdoc_options: []
|
456
475
|
require_paths:
|
457
476
|
- lib
|
@@ -462,12 +481,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
462
481
|
version: '0'
|
463
482
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
464
483
|
requirements:
|
465
|
-
- - "
|
484
|
+
- - ">"
|
466
485
|
- !ruby/object:Gem::Version
|
467
|
-
version:
|
486
|
+
version: 1.3.1
|
468
487
|
requirements: []
|
469
488
|
rubygems_version: 3.0.3.1
|
470
|
-
signing_key:
|
489
|
+
signing_key:
|
471
490
|
specification_version: 4
|
472
491
|
summary: Gem containing Elastic connectors service
|
473
492
|
test_files: []
|
@@ -1,33 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module OutputSink
|
11
|
-
class BaseSink
|
12
|
-
def ingest(_document)
|
13
|
-
raise 'not implemented'
|
14
|
-
end
|
15
|
-
|
16
|
-
def ingest_multiple(_documents)
|
17
|
-
raise 'not implemented'
|
18
|
-
end
|
19
|
-
|
20
|
-
def delete(_id)
|
21
|
-
raise 'not implemented'
|
22
|
-
end
|
23
|
-
|
24
|
-
def delete_multiple(_ids)
|
25
|
-
raise 'not implemented'
|
26
|
-
end
|
27
|
-
|
28
|
-
def flush(_size: nil)
|
29
|
-
raise 'not implemented'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink/base_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
-
def initialize(sinks = [])
|
15
|
-
@sinks = sinks
|
16
|
-
end
|
17
|
-
|
18
|
-
def ingest(document)
|
19
|
-
@sinks.each { |sink| sink.ingest(document) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def flush(size: nil)
|
23
|
-
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest_multiple(documents)
|
27
|
-
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def delete(id)
|
31
|
-
@sinks.each { |sink| sink.delete(id) }
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|