connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T010623Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -8
- data/lib/app/app.rb +4 -0
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +44 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +132 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +80 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +87 -0
- data/lib/utility/constants.rb +7 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +31 -12
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,16 +21,29 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
22
|
-
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
26
|
+
def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@sink = Core::
|
28
|
+
@sink = Core::Ingestion::EsSink.new(
|
29
|
+
connector_settings.index_name,
|
30
|
+
@connector_settings.request_pipeline,
|
31
|
+
Utility::BulkQueue.new(
|
32
|
+
max_ingestion_queue_size,
|
33
|
+
max_ingestion_queue_bytes
|
34
|
+
),
|
35
|
+
max_ingestion_queue_bytes
|
36
|
+
)
|
25
37
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
38
|
@sync_finished = false
|
39
|
+
@sync_error = nil
|
27
40
|
@status = {
|
28
41
|
:indexed_document_count => 0,
|
29
42
|
:deleted_document_count => 0,
|
43
|
+
:indexed_document_volume => 0,
|
30
44
|
:error => nil
|
31
45
|
}
|
46
|
+
@job = job
|
32
47
|
end
|
33
48
|
|
34
49
|
def execute
|
@@ -41,8 +56,17 @@ module Core
|
|
41
56
|
def do_sync!
|
42
57
|
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
58
|
|
44
|
-
|
45
|
-
|
59
|
+
# connector service doesn't support multiple jobs running simultaneously
|
60
|
+
raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
|
61
|
+
|
62
|
+
Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
|
63
|
+
|
64
|
+
# claim the job
|
65
|
+
@job.make_running!
|
66
|
+
|
67
|
+
job_description = @job.es_source
|
68
|
+
job_id = @job.id
|
69
|
+
job_description['_id'] = job_id
|
46
70
|
|
47
71
|
unless job_id.present?
|
48
72
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -52,6 +76,10 @@ module Core
|
|
52
76
|
begin
|
53
77
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
54
78
|
|
79
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
80
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
81
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
82
|
+
|
55
83
|
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
56
84
|
|
57
85
|
connector_instance.do_health_check!
|
@@ -61,11 +89,21 @@ module Core
|
|
61
89
|
|
62
90
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
63
91
|
|
92
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
93
|
+
reporting_cycle_start = Time.now
|
94
|
+
Utility::Logger.info('Yielding documents...')
|
64
95
|
connector_instance.yield_documents do |document|
|
65
96
|
document = add_ingest_metadata(document)
|
66
|
-
|
67
|
-
|
68
|
-
|
97
|
+
post_process_result = post_processing_engine.process(document)
|
98
|
+
if post_process_result.is_include?
|
99
|
+
@sink.ingest(document)
|
100
|
+
incoming_ids << document['id']
|
101
|
+
end
|
102
|
+
|
103
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
104
|
+
ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
105
|
+
reporting_cycle_start = Time.now
|
106
|
+
end
|
69
107
|
end
|
70
108
|
|
71
109
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -74,7 +112,11 @@ module Core
|
|
74
112
|
|
75
113
|
ids_to_delete.each do |id|
|
76
114
|
@sink.delete(id)
|
77
|
-
|
115
|
+
|
116
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
117
|
+
ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
118
|
+
reporting_cycle_start = Time.now
|
119
|
+
end
|
78
120
|
end
|
79
121
|
|
80
122
|
@sink.flush
|
@@ -83,22 +125,34 @@ module Core
|
|
83
125
|
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
84
126
|
@sync_finished = true
|
85
127
|
rescue StandardError => e
|
86
|
-
@
|
128
|
+
@sync_error = e.message
|
87
129
|
Utility::ExceptionTracking.log_exception(e)
|
88
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
89
130
|
ensure
|
131
|
+
stats = @sink.ingestion_stats
|
132
|
+
|
133
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
134
|
+
|
135
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
136
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
137
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
138
|
+
|
90
139
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
91
140
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
92
141
|
|
93
142
|
# Make sure to not override a previous error message
|
94
|
-
if !@sync_finished && @
|
95
|
-
@
|
143
|
+
if !@sync_finished && @sync_error.nil?
|
144
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
145
|
+
end
|
146
|
+
|
147
|
+
unless connector_instance.nil?
|
148
|
+
metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
149
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
96
150
|
end
|
97
151
|
|
98
|
-
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @
|
152
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
99
153
|
|
100
|
-
if @
|
101
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@
|
154
|
+
if @sync_error
|
155
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
102
156
|
else
|
103
157
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
104
158
|
end
|
@@ -119,5 +173,15 @@ module Core
|
|
119
173
|
|
120
174
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
121
175
|
end
|
176
|
+
|
177
|
+
def validate_filtering(filtering)
|
178
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
179
|
+
|
180
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
181
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
182
|
+
|
183
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
184
|
+
raise errors_present_error if validation_result[:errors].present?
|
185
|
+
end
|
122
186
|
end
|
123
187
|
end
|
data/lib/core.rb
CHANGED
@@ -7,10 +7,14 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
15
17
|
require 'core/native_scheduler'
|
16
18
|
require 'core/sync_job_runner'
|
19
|
+
require 'core/jobs/producer'
|
20
|
+
require 'core/jobs/consumer'
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
require 'utility/constants'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
class BulkQueue
|
13
|
+
class QueueOverflowError < StandardError; end
|
14
|
+
|
15
|
+
# 500 items or 5MB
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
17
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
18
|
+
@size_threshold = size_threshold.freeze
|
19
|
+
|
20
|
+
@buffer = ''
|
21
|
+
|
22
|
+
@current_operation_count = 0
|
23
|
+
|
24
|
+
@current_buffer_size = 0
|
25
|
+
@current_data_size = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
def pop_all
|
29
|
+
result = @buffer
|
30
|
+
|
31
|
+
reset
|
32
|
+
|
33
|
+
result
|
34
|
+
end
|
35
|
+
|
36
|
+
def add(operation, payload = nil)
|
37
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
38
|
+
|
39
|
+
operation_size = get_size(operation)
|
40
|
+
payload_size = get_size(payload)
|
41
|
+
|
42
|
+
@current_operation_count += 1
|
43
|
+
@current_buffer_size += operation_size
|
44
|
+
@current_buffer_size += payload_size
|
45
|
+
@current_data_size += payload_size
|
46
|
+
|
47
|
+
@buffer << operation
|
48
|
+
@buffer << "\n"
|
49
|
+
|
50
|
+
if payload
|
51
|
+
@buffer << payload
|
52
|
+
@buffer << "\n"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def will_fit?(operation, payload = nil)
|
57
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
58
|
+
|
59
|
+
operation_size = get_size(operation)
|
60
|
+
payload_size = get_size(payload)
|
61
|
+
|
62
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
63
|
+
end
|
64
|
+
|
65
|
+
def current_stats
|
66
|
+
{
|
67
|
+
:current_operation_count => @current_operation_count,
|
68
|
+
:current_buffer_size => @current_buffer_size
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def get_size(str)
|
75
|
+
return 0 unless str
|
76
|
+
str.bytesize
|
77
|
+
end
|
78
|
+
|
79
|
+
def reset
|
80
|
+
@current_operation_count = 0
|
81
|
+
@current_buffer_size = 0
|
82
|
+
@current_data_size = 0
|
83
|
+
|
84
|
+
@buffer = ''
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/utility/constants.rb
CHANGED
@@ -16,5 +16,12 @@ module Utility
|
|
16
16
|
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
17
|
CONTENT_INDEX_PREFIX = 'search-'
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
+
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
|
+
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
|
+
|
22
|
+
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
|
+
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
24
|
+
# Maximum size of either whole BULK Elasticsearch operation or one document in it
|
25
|
+
DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
|
19
26
|
end
|
20
27
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'utility/errors'
|
11
|
+
require 'utility/exception_tracking'
|
12
|
+
|
13
|
+
module Utility
|
14
|
+
class ErrorMonitor
|
15
|
+
class MonitoringError < StandardError
|
16
|
+
attr_accessor :tripped_by
|
17
|
+
|
18
|
+
def initialize(message = nil, tripped_by: nil)
|
19
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
+
@tripped_by = tripped_by
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
+
class MaxErrorsExceededError < MonitoringError; end
|
26
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
+
|
28
|
+
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
+
|
30
|
+
def initialize(
|
31
|
+
max_errors: 1000,
|
32
|
+
max_consecutive_errors: 10,
|
33
|
+
max_error_ratio: 0.15,
|
34
|
+
window_size: 100,
|
35
|
+
error_queue_size: 20
|
36
|
+
)
|
37
|
+
@max_errors = max_errors
|
38
|
+
@max_consecutive_errors = max_consecutive_errors
|
39
|
+
@max_error_ratio = max_error_ratio
|
40
|
+
@window_size = window_size
|
41
|
+
@total_error_count = 0
|
42
|
+
@success_count = 0
|
43
|
+
@consecutive_error_count = 0
|
44
|
+
@window_errors = Array.new(window_size) { false }
|
45
|
+
@window_index = 0
|
46
|
+
@last_error = nil
|
47
|
+
@error_queue_size = error_queue_size
|
48
|
+
@error_queue = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def note_success
|
52
|
+
@consecutive_error_count = 0
|
53
|
+
@success_count += 1
|
54
|
+
increment_window_index
|
55
|
+
end
|
56
|
+
|
57
|
+
def note_error(error, id: Time.now.to_i)
|
58
|
+
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
+
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
+
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
+
@total_error_count += 1
|
62
|
+
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
64
|
+
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
+
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
+
increment_window_index
|
67
|
+
@last_error = error
|
68
|
+
|
69
|
+
raise_if_necessary
|
70
|
+
end
|
71
|
+
|
72
|
+
def finalize
|
73
|
+
total_documents = @total_error_count + @success_count
|
74
|
+
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
+
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def raise_if_necessary
|
82
|
+
error =
|
83
|
+
if @consecutive_error_count > @max_consecutive_errors
|
84
|
+
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
+
elsif @total_error_count > @max_errors
|
86
|
+
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
+
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
+
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
+
end
|
90
|
+
|
91
|
+
raise_with_last_cause(error) if error
|
92
|
+
end
|
93
|
+
|
94
|
+
def num_errors_in_window
|
95
|
+
@window_errors.count(&:itself).to_f
|
96
|
+
end
|
97
|
+
|
98
|
+
def increment_window_index
|
99
|
+
@window_index = (@window_index + 1) % @window_size
|
100
|
+
end
|
101
|
+
|
102
|
+
def raise_with_last_cause(error)
|
103
|
+
raise @last_error
|
104
|
+
rescue StandardError
|
105
|
+
raise error
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/utility/errors.rb
CHANGED
@@ -60,18 +60,6 @@ module Utility
|
|
60
60
|
class JobDocumentLimitError < StandardError; end
|
61
61
|
class JobClaimingError < StandardError; end
|
62
62
|
|
63
|
-
class MonitoringError < StandardError
|
64
|
-
attr_accessor :tripped_by
|
65
|
-
|
66
|
-
def initialize(message = nil, tripped_by: nil)
|
67
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
68
|
-
@tripped_by = tripped_by
|
69
|
-
end
|
70
|
-
end
|
71
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
72
|
-
class MaxErrorsExceededError < MonitoringError; end
|
73
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
74
|
-
|
75
63
|
class JobSyncNotPossibleYetError < StandardError
|
76
64
|
attr_accessor :sync_will_be_possible_at
|
77
65
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Filtering
|
11
|
+
class << self
|
12
|
+
def extract_filter(filtering)
|
13
|
+
return {} unless filtering.present?
|
14
|
+
|
15
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
+
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
+
|
18
|
+
filter.present? ? filter : {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/utility/logger.rb
CHANGED
@@ -23,7 +23,7 @@ module Utility
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
26
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
27
|
end
|
28
28
|
|
29
29
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,21 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
# !!!!!!!!
|
8
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
9
|
+
require 'utility/bulk_queue'
|
10
|
+
require 'utility/common'
|
7
11
|
require 'utility/constants'
|
8
12
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
13
|
+
require 'utility/elasticsearch/index/mappings'
|
14
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
15
|
+
require 'utility/environment'
|
16
|
+
require 'utility/error_monitor'
|
10
17
|
require 'utility/errors'
|
18
|
+
require 'utility/filtering'
|
11
19
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
20
|
require 'utility/exception_tracking'
|
14
21
|
require 'utility/extension_mapping_util'
|
15
22
|
require 'utility/logger'
|
16
|
-
|
17
|
-
|
23
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
24
|
+
# !!!!!!!!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T010623Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -396,34 +396,50 @@ files:
|
|
396
396
|
- lib/app/version.rb
|
397
397
|
- lib/connectors.rb
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
|
+
- lib/connectors/base/advanced_snippet_against_schema_validator.rb
|
400
|
+
- lib/connectors/base/advanced_snippet_validator.rb
|
399
401
|
- lib/connectors/base/connector.rb
|
400
402
|
- lib/connectors/base/custom_client.rb
|
403
|
+
- lib/connectors/base/simple_rules_parser.rb
|
401
404
|
- lib/connectors/connector_status.rb
|
402
405
|
- lib/connectors/crawler/scheduler.rb
|
403
406
|
- lib/connectors/example/attachments/first_attachment.txt
|
404
407
|
- lib/connectors/example/attachments/second_attachment.txt
|
405
408
|
- lib/connectors/example/attachments/third_attachment.txt
|
406
409
|
- lib/connectors/example/connector.rb
|
410
|
+
- lib/connectors/example/example_advanced_snippet_validator.rb
|
407
411
|
- lib/connectors/gitlab/adapter.rb
|
408
412
|
- lib/connectors/gitlab/connector.rb
|
409
413
|
- lib/connectors/gitlab/custom_client.rb
|
410
414
|
- lib/connectors/gitlab/extractor.rb
|
415
|
+
- lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
|
411
416
|
- lib/connectors/mongodb/connector.rb
|
417
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
|
418
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
|
419
|
+
- lib/connectors/mongodb/mongo_rules_parser.rb
|
412
420
|
- lib/connectors/registry.rb
|
413
421
|
- lib/connectors/sync_status.rb
|
422
|
+
- lib/connectors/tolerable_error_helper.rb
|
423
|
+
- lib/connectors_app/\
|
414
424
|
- lib/connectors_service.rb
|
415
425
|
- lib/connectors_utility.rb
|
416
426
|
- lib/core.rb
|
417
427
|
- lib/core/configuration.rb
|
428
|
+
- lib/core/connector_job.rb
|
418
429
|
- lib/core/connector_settings.rb
|
419
430
|
- lib/core/elastic_connector_actions.rb
|
431
|
+
- lib/core/filtering.rb
|
432
|
+
- lib/core/filtering/post_process_engine.rb
|
433
|
+
- lib/core/filtering/post_process_result.rb
|
434
|
+
- lib/core/filtering/simple_rule.rb
|
435
|
+
- lib/core/filtering/validation_job_runner.rb
|
436
|
+
- lib/core/filtering/validation_status.rb
|
420
437
|
- lib/core/heartbeat.rb
|
438
|
+
- lib/core/ingestion.rb
|
439
|
+
- lib/core/ingestion/es_sink.rb
|
440
|
+
- lib/core/jobs/consumer.rb
|
441
|
+
- lib/core/jobs/producer.rb
|
421
442
|
- lib/core/native_scheduler.rb
|
422
|
-
- lib/core/output_sink.rb
|
423
|
-
- lib/core/output_sink/base_sink.rb
|
424
|
-
- lib/core/output_sink/combined_sink.rb
|
425
|
-
- lib/core/output_sink/console_sink.rb
|
426
|
-
- lib/core/output_sink/es_sink.rb
|
427
443
|
- lib/core/scheduler.rb
|
428
444
|
- lib/core/single_scheduler.rb
|
429
445
|
- lib/core/sync_job_runner.rb
|
@@ -432,6 +448,7 @@ files:
|
|
432
448
|
- lib/stubs/connectors/stats.rb
|
433
449
|
- lib/stubs/service_type.rb
|
434
450
|
- lib/utility.rb
|
451
|
+
- lib/utility/bulk_queue.rb
|
435
452
|
- lib/utility/common.rb
|
436
453
|
- lib/utility/constants.rb
|
437
454
|
- lib/utility/cron.rb
|
@@ -439,10 +456,12 @@ files:
|
|
439
456
|
- lib/utility/elasticsearch/index/mappings.rb
|
440
457
|
- lib/utility/elasticsearch/index/text_analysis_settings.rb
|
441
458
|
- lib/utility/environment.rb
|
459
|
+
- lib/utility/error_monitor.rb
|
442
460
|
- lib/utility/errors.rb
|
443
461
|
- lib/utility/es_client.rb
|
444
462
|
- lib/utility/exception_tracking.rb
|
445
463
|
- lib/utility/extension_mapping_util.rb
|
464
|
+
- lib/utility/filtering.rb
|
446
465
|
- lib/utility/logger.rb
|
447
466
|
- lib/utility/middleware/basic_auth.rb
|
448
467
|
- lib/utility/middleware/bearer_auth.rb
|
@@ -451,7 +470,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
451
470
|
licenses:
|
452
471
|
- Elastic-2.0
|
453
472
|
metadata: {}
|
454
|
-
post_install_message:
|
473
|
+
post_install_message:
|
455
474
|
rdoc_options: []
|
456
475
|
require_paths:
|
457
476
|
- lib
|
@@ -462,12 +481,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
462
481
|
version: '0'
|
463
482
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
464
483
|
requirements:
|
465
|
-
- - "
|
484
|
+
- - ">"
|
466
485
|
- !ruby/object:Gem::Version
|
467
|
-
version:
|
486
|
+
version: 1.3.1
|
468
487
|
requirements: []
|
469
488
|
rubygems_version: 3.0.3.1
|
470
|
-
signing_key:
|
489
|
+
signing_key:
|
471
490
|
specification_version: 4
|
472
491
|
summary: Gem containing Elastic connectors service
|
473
492
|
test_files: []
|
@@ -1,33 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module OutputSink
|
11
|
-
class BaseSink
|
12
|
-
def ingest(_document)
|
13
|
-
raise 'not implemented'
|
14
|
-
end
|
15
|
-
|
16
|
-
def ingest_multiple(_documents)
|
17
|
-
raise 'not implemented'
|
18
|
-
end
|
19
|
-
|
20
|
-
def delete(_id)
|
21
|
-
raise 'not implemented'
|
22
|
-
end
|
23
|
-
|
24
|
-
def delete_multiple(_ids)
|
25
|
-
raise 'not implemented'
|
26
|
-
end
|
27
|
-
|
28
|
-
def flush(_size: nil)
|
29
|
-
raise 'not implemented'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink/base_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
-
def initialize(sinks = [])
|
15
|
-
@sinks = sinks
|
16
|
-
end
|
17
|
-
|
18
|
-
def ingest(document)
|
19
|
-
@sinks.each { |sink| sink.ingest(document) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def flush(size: nil)
|
23
|
-
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest_multiple(documents)
|
27
|
-
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def delete(id)
|
31
|
-
@sinks.each { |sink| sink.delete(id) }
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|