connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +4 -0
- data/lib/app/dispatcher.rb +42 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +114 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +72 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +25 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,16 +21,21 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
22
|
-
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
26
|
+
def initialize(connector_settings, job)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@sink = Core::
|
28
|
+
@sink = Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
|
25
29
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
30
|
@sync_finished = false
|
31
|
+
@sync_error = nil
|
27
32
|
@status = {
|
28
33
|
:indexed_document_count => 0,
|
29
34
|
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0,
|
30
36
|
:error => nil
|
31
37
|
}
|
38
|
+
@job = job
|
32
39
|
end
|
33
40
|
|
34
41
|
def execute
|
@@ -41,8 +48,17 @@ module Core
|
|
41
48
|
def do_sync!
|
42
49
|
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
50
|
|
44
|
-
|
45
|
-
|
51
|
+
# connector service doesn't support multiple jobs running simultaneously
|
52
|
+
raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
|
53
|
+
|
54
|
+
Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
|
55
|
+
|
56
|
+
# claim the job
|
57
|
+
@job.make_running!
|
58
|
+
|
59
|
+
job_description = @job.es_source
|
60
|
+
job_id = @job.id
|
61
|
+
job_description['_id'] = job_id
|
46
62
|
|
47
63
|
unless job_id.present?
|
48
64
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -52,6 +68,10 @@ module Core
|
|
52
68
|
begin
|
53
69
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
54
70
|
|
71
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
72
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
73
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
74
|
+
|
55
75
|
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
56
76
|
|
57
77
|
connector_instance.do_health_check!
|
@@ -61,11 +81,21 @@ module Core
|
|
61
81
|
|
62
82
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
63
83
|
|
84
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
85
|
+
reporting_cycle_start = Time.now
|
86
|
+
Utility::Logger.info('Yielding documents...')
|
64
87
|
connector_instance.yield_documents do |document|
|
65
88
|
document = add_ingest_metadata(document)
|
66
|
-
|
67
|
-
|
68
|
-
|
89
|
+
post_process_result = post_processing_engine.process(document)
|
90
|
+
if post_process_result.is_include?
|
91
|
+
@sink.ingest(document)
|
92
|
+
incoming_ids << document['id']
|
93
|
+
end
|
94
|
+
|
95
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
96
|
+
ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
97
|
+
reporting_cycle_start = Time.now
|
98
|
+
end
|
69
99
|
end
|
70
100
|
|
71
101
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -74,7 +104,11 @@ module Core
|
|
74
104
|
|
75
105
|
ids_to_delete.each do |id|
|
76
106
|
@sink.delete(id)
|
77
|
-
|
107
|
+
|
108
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
109
|
+
ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
110
|
+
reporting_cycle_start = Time.now
|
111
|
+
end
|
78
112
|
end
|
79
113
|
|
80
114
|
@sink.flush
|
@@ -83,22 +117,34 @@ module Core
|
|
83
117
|
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
84
118
|
@sync_finished = true
|
85
119
|
rescue StandardError => e
|
86
|
-
@
|
120
|
+
@sync_error = e.message
|
87
121
|
Utility::ExceptionTracking.log_exception(e)
|
88
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
89
122
|
ensure
|
123
|
+
stats = @sink.ingestion_stats
|
124
|
+
|
125
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
126
|
+
|
127
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
128
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
129
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
130
|
+
|
90
131
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
91
132
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
92
133
|
|
93
134
|
# Make sure to not override a previous error message
|
94
|
-
if !@sync_finished && @
|
95
|
-
@
|
135
|
+
if !@sync_finished && @sync_error.nil?
|
136
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
137
|
+
end
|
138
|
+
|
139
|
+
unless connector_instance.nil?
|
140
|
+
metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
141
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
96
142
|
end
|
97
143
|
|
98
|
-
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @
|
144
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
99
145
|
|
100
|
-
if @
|
101
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@
|
146
|
+
if @sync_error
|
147
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
102
148
|
else
|
103
149
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
104
150
|
end
|
@@ -119,5 +165,15 @@ module Core
|
|
119
165
|
|
120
166
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
121
167
|
end
|
168
|
+
|
169
|
+
def validate_filtering(filtering)
|
170
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
171
|
+
|
172
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
173
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
174
|
+
|
175
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
176
|
+
raise errors_present_error if validation_result[:errors].present?
|
177
|
+
end
|
122
178
|
end
|
123
179
|
end
|
data/lib/core.rb
CHANGED
@@ -7,10 +7,14 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
15
17
|
require 'core/native_scheduler'
|
16
18
|
require 'core/sync_job_runner'
|
19
|
+
require 'core/jobs/producer'
|
20
|
+
require 'core/jobs/consumer'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/utility/constants.rb
CHANGED
@@ -0,0 +1,108 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'utility/errors'
|
11
|
+
require 'utility/exception_tracking'
|
12
|
+
|
13
|
+
module Utility
|
14
|
+
class ErrorMonitor
|
15
|
+
class MonitoringError < StandardError
|
16
|
+
attr_accessor :tripped_by
|
17
|
+
|
18
|
+
def initialize(message = nil, tripped_by: nil)
|
19
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
+
@tripped_by = tripped_by
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
+
class MaxErrorsExceededError < MonitoringError; end
|
26
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
+
|
28
|
+
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
+
|
30
|
+
def initialize(
|
31
|
+
max_errors: 1000,
|
32
|
+
max_consecutive_errors: 10,
|
33
|
+
max_error_ratio: 0.15,
|
34
|
+
window_size: 100,
|
35
|
+
error_queue_size: 20
|
36
|
+
)
|
37
|
+
@max_errors = max_errors
|
38
|
+
@max_consecutive_errors = max_consecutive_errors
|
39
|
+
@max_error_ratio = max_error_ratio
|
40
|
+
@window_size = window_size
|
41
|
+
@total_error_count = 0
|
42
|
+
@success_count = 0
|
43
|
+
@consecutive_error_count = 0
|
44
|
+
@window_errors = Array.new(window_size) { false }
|
45
|
+
@window_index = 0
|
46
|
+
@last_error = nil
|
47
|
+
@error_queue_size = error_queue_size
|
48
|
+
@error_queue = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def note_success
|
52
|
+
@consecutive_error_count = 0
|
53
|
+
@success_count += 1
|
54
|
+
increment_window_index
|
55
|
+
end
|
56
|
+
|
57
|
+
def note_error(error, id: Time.now.to_i)
|
58
|
+
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
+
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
+
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
+
@total_error_count += 1
|
62
|
+
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
64
|
+
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
+
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
+
increment_window_index
|
67
|
+
@last_error = error
|
68
|
+
|
69
|
+
raise_if_necessary
|
70
|
+
end
|
71
|
+
|
72
|
+
def finalize
|
73
|
+
total_documents = @total_error_count + @success_count
|
74
|
+
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
+
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def raise_if_necessary
|
82
|
+
error =
|
83
|
+
if @consecutive_error_count > @max_consecutive_errors
|
84
|
+
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
+
elsif @total_error_count > @max_errors
|
86
|
+
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
+
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
+
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
+
end
|
90
|
+
|
91
|
+
raise_with_last_cause(error) if error
|
92
|
+
end
|
93
|
+
|
94
|
+
def num_errors_in_window
|
95
|
+
@window_errors.count(&:itself).to_f
|
96
|
+
end
|
97
|
+
|
98
|
+
def increment_window_index
|
99
|
+
@window_index = (@window_index + 1) % @window_size
|
100
|
+
end
|
101
|
+
|
102
|
+
def raise_with_last_cause(error)
|
103
|
+
raise @last_error
|
104
|
+
rescue StandardError
|
105
|
+
raise error
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/utility/errors.rb
CHANGED
@@ -60,18 +60,6 @@ module Utility
|
|
60
60
|
class JobDocumentLimitError < StandardError; end
|
61
61
|
class JobClaimingError < StandardError; end
|
62
62
|
|
63
|
-
class MonitoringError < StandardError
|
64
|
-
attr_accessor :tripped_by
|
65
|
-
|
66
|
-
def initialize(message = nil, tripped_by: nil)
|
67
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
68
|
-
@tripped_by = tripped_by
|
69
|
-
end
|
70
|
-
end
|
71
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
72
|
-
class MaxErrorsExceededError < MonitoringError; end
|
73
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
74
|
-
|
75
63
|
class JobSyncNotPossibleYetError < StandardError
|
76
64
|
attr_accessor :sync_will_be_possible_at
|
77
65
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Filtering
|
11
|
+
class << self
|
12
|
+
def extract_filter(filtering)
|
13
|
+
return {} unless filtering.present?
|
14
|
+
|
15
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
+
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
+
|
18
|
+
filter.present? ? filter : {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/utility/logger.rb
CHANGED
@@ -23,7 +23,7 @@ module Utility
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
26
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
27
|
end
|
28
28
|
|
29
29
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,21 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
# !!!!!!!!
|
8
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
9
|
+
require 'utility/bulk_queue'
|
10
|
+
require 'utility/common'
|
7
11
|
require 'utility/constants'
|
8
12
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
13
|
+
require 'utility/elasticsearch/index/mappings'
|
14
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
15
|
+
require 'utility/environment'
|
16
|
+
require 'utility/error_monitor'
|
10
17
|
require 'utility/errors'
|
18
|
+
require 'utility/filtering'
|
11
19
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
20
|
require 'utility/exception_tracking'
|
14
21
|
require 'utility/extension_mapping_util'
|
15
22
|
require 'utility/logger'
|
16
|
-
|
17
|
-
|
23
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
24
|
+
# !!!!!!!!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.4.pre.
|
4
|
+
version: 8.6.0.4.pre.20221116T024501Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -396,34 +396,49 @@ files:
|
|
396
396
|
- lib/app/version.rb
|
397
397
|
- lib/connectors.rb
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
|
+
- lib/connectors/base/advanced_snippet_against_schema_validator.rb
|
400
|
+
- lib/connectors/base/advanced_snippet_validator.rb
|
399
401
|
- lib/connectors/base/connector.rb
|
400
402
|
- lib/connectors/base/custom_client.rb
|
403
|
+
- lib/connectors/base/simple_rules_parser.rb
|
401
404
|
- lib/connectors/connector_status.rb
|
402
405
|
- lib/connectors/crawler/scheduler.rb
|
403
406
|
- lib/connectors/example/attachments/first_attachment.txt
|
404
407
|
- lib/connectors/example/attachments/second_attachment.txt
|
405
408
|
- lib/connectors/example/attachments/third_attachment.txt
|
406
409
|
- lib/connectors/example/connector.rb
|
410
|
+
- lib/connectors/example/example_advanced_snippet_validator.rb
|
407
411
|
- lib/connectors/gitlab/adapter.rb
|
408
412
|
- lib/connectors/gitlab/connector.rb
|
409
413
|
- lib/connectors/gitlab/custom_client.rb
|
410
414
|
- lib/connectors/gitlab/extractor.rb
|
415
|
+
- lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
|
411
416
|
- lib/connectors/mongodb/connector.rb
|
417
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
|
418
|
+
- lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
|
419
|
+
- lib/connectors/mongodb/mongo_rules_parser.rb
|
412
420
|
- lib/connectors/registry.rb
|
413
421
|
- lib/connectors/sync_status.rb
|
422
|
+
- lib/connectors/tolerable_error_helper.rb
|
414
423
|
- lib/connectors_service.rb
|
415
424
|
- lib/connectors_utility.rb
|
416
425
|
- lib/core.rb
|
417
426
|
- lib/core/configuration.rb
|
427
|
+
- lib/core/connector_job.rb
|
418
428
|
- lib/core/connector_settings.rb
|
419
429
|
- lib/core/elastic_connector_actions.rb
|
430
|
+
- lib/core/filtering.rb
|
431
|
+
- lib/core/filtering/post_process_engine.rb
|
432
|
+
- lib/core/filtering/post_process_result.rb
|
433
|
+
- lib/core/filtering/simple_rule.rb
|
434
|
+
- lib/core/filtering/validation_job_runner.rb
|
435
|
+
- lib/core/filtering/validation_status.rb
|
420
436
|
- lib/core/heartbeat.rb
|
437
|
+
- lib/core/ingestion.rb
|
438
|
+
- lib/core/ingestion/es_sink.rb
|
439
|
+
- lib/core/jobs/consumer.rb
|
440
|
+
- lib/core/jobs/producer.rb
|
421
441
|
- lib/core/native_scheduler.rb
|
422
|
-
- lib/core/output_sink.rb
|
423
|
-
- lib/core/output_sink/base_sink.rb
|
424
|
-
- lib/core/output_sink/combined_sink.rb
|
425
|
-
- lib/core/output_sink/console_sink.rb
|
426
|
-
- lib/core/output_sink/es_sink.rb
|
427
442
|
- lib/core/scheduler.rb
|
428
443
|
- lib/core/single_scheduler.rb
|
429
444
|
- lib/core/sync_job_runner.rb
|
@@ -432,6 +447,7 @@ files:
|
|
432
447
|
- lib/stubs/connectors/stats.rb
|
433
448
|
- lib/stubs/service_type.rb
|
434
449
|
- lib/utility.rb
|
450
|
+
- lib/utility/bulk_queue.rb
|
435
451
|
- lib/utility/common.rb
|
436
452
|
- lib/utility/constants.rb
|
437
453
|
- lib/utility/cron.rb
|
@@ -439,10 +455,12 @@ files:
|
|
439
455
|
- lib/utility/elasticsearch/index/mappings.rb
|
440
456
|
- lib/utility/elasticsearch/index/text_analysis_settings.rb
|
441
457
|
- lib/utility/environment.rb
|
458
|
+
- lib/utility/error_monitor.rb
|
442
459
|
- lib/utility/errors.rb
|
443
460
|
- lib/utility/es_client.rb
|
444
461
|
- lib/utility/exception_tracking.rb
|
445
462
|
- lib/utility/extension_mapping_util.rb
|
463
|
+
- lib/utility/filtering.rb
|
446
464
|
- lib/utility/logger.rb
|
447
465
|
- lib/utility/middleware/basic_auth.rb
|
448
466
|
- lib/utility/middleware/bearer_auth.rb
|
@@ -1,33 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module OutputSink
|
11
|
-
class BaseSink
|
12
|
-
def ingest(_document)
|
13
|
-
raise 'not implemented'
|
14
|
-
end
|
15
|
-
|
16
|
-
def ingest_multiple(_documents)
|
17
|
-
raise 'not implemented'
|
18
|
-
end
|
19
|
-
|
20
|
-
def delete(_id)
|
21
|
-
raise 'not implemented'
|
22
|
-
end
|
23
|
-
|
24
|
-
def delete_multiple(_ids)
|
25
|
-
raise 'not implemented'
|
26
|
-
end
|
27
|
-
|
28
|
-
def flush(_size: nil)
|
29
|
-
raise 'not implemented'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink/base_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
-
def initialize(sinks = [])
|
15
|
-
@sinks = sinks
|
16
|
-
end
|
17
|
-
|
18
|
-
def ingest(document)
|
19
|
-
@sinks.each { |sink| sink.ingest(document) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def flush(size: nil)
|
23
|
-
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest_multiple(documents)
|
27
|
-
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def delete(id)
|
31
|
-
@sinks.each { |sink| sink.delete(id) }
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
-
def ingest(document)
|
15
|
-
print_header 'Got a single document:'
|
16
|
-
puts document
|
17
|
-
end
|
18
|
-
|
19
|
-
def flush(size: nil)
|
20
|
-
print_header 'Flushing'
|
21
|
-
puts "Flush size: #{size}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def ingest_multiple(documents)
|
25
|
-
print_header 'Got multiple documents:'
|
26
|
-
puts documents
|
27
|
-
end
|
28
|
-
|
29
|
-
def delete(id)
|
30
|
-
print_header "Deleting single id: #{id}"
|
31
|
-
puts id
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
print_header "Deleting several ids: #{ids}"
|
36
|
-
puts ids
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def print_delim
|
42
|
-
puts '----------------------------------------------------'
|
43
|
-
end
|
44
|
-
|
45
|
-
def print_header(header)
|
46
|
-
print_delim
|
47
|
-
puts header
|
48
|
-
print_delim
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|