connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -10
- data/lib/app/config.rb +2 -0
- data/lib/app/dispatcher.rb +17 -1
- data/lib/app/preflight_check.rb +15 -0
- data/lib/connectors/base/connector.rb +37 -4
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
- data/lib/connectors/example/connector.rb +43 -4
- data/lib/connectors/gitlab/connector.rb +16 -2
- data/lib/connectors/mongodb/connector.rb +173 -50
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/registry.rb +2 -2
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/configuration.rb +4 -2
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +29 -18
- data/lib/core/elastic_connector_actions.rb +331 -32
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/native_scheduler.rb +3 -0
- data/lib/core/scheduler.rb +43 -10
- data/lib/core/single_scheduler.rb +3 -0
- data/lib/core/sync_job_runner.rb +78 -18
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/common.rb +20 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -3
- metadata +27 -18
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
15
|
+
|
16
|
+
class ValidationJobRunner
|
17
|
+
def initialize(connector_settings)
|
18
|
+
@connector_settings = connector_settings
|
19
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
20
|
+
@validation_finished = false
|
21
|
+
@status = { :error => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
|
26
|
+
|
27
|
+
validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
|
28
|
+
|
29
|
+
# currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
|
30
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
|
31
|
+
|
32
|
+
@validation_finished = true
|
33
|
+
rescue StandardError => e
|
34
|
+
Utility::ExceptionTracking.log_exception(e)
|
35
|
+
validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
|
36
|
+
:errors => [
|
37
|
+
{ :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
|
38
|
+
] }
|
39
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
|
40
|
+
ensure
|
41
|
+
if !@validation_finished && !@status[:error].present?
|
42
|
+
@status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
|
43
|
+
end
|
44
|
+
|
45
|
+
if @status[:error]
|
46
|
+
Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
|
47
|
+
else
|
48
|
+
Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -4,10 +4,14 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Filtering
|
11
|
+
class ValidationStatus
|
12
|
+
INVALID = 'invalid'
|
13
|
+
VALID = 'valid'
|
14
|
+
EDITED = 'edited'
|
15
|
+
end
|
12
16
|
end
|
13
17
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering/post_process_engine'
|
10
|
+
require 'core/filtering/post_process_result'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
require 'core/filtering/validation_job_runner'
|
13
|
+
require 'core/filtering/validation_status'
|
14
|
+
|
15
|
+
module Core::Filtering
|
16
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
17
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'app/config'
|
10
|
+
require 'utility/bulk_queue'
|
11
|
+
require 'utility/es_client'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'elasticsearch/api'
|
14
|
+
|
15
|
+
module Core
|
16
|
+
module Ingestion
|
17
|
+
class EsSink
|
18
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = bulk_queue
|
23
|
+
end
|
24
|
+
|
25
|
+
def ingest(id, serialized_document)
|
26
|
+
index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
|
27
|
+
|
28
|
+
flush unless @operation_queue.will_fit?(index_op, serialized_document)
|
29
|
+
|
30
|
+
@operation_queue.add(
|
31
|
+
index_op,
|
32
|
+
serialized_document
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def delete(doc_id)
|
37
|
+
delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
|
38
|
+
flush unless @operation_queue.will_fit?(delete_op)
|
39
|
+
|
40
|
+
@operation_queue.add(delete_op)
|
41
|
+
end
|
42
|
+
|
43
|
+
def flush
|
44
|
+
data = @operation_queue.pop_all
|
45
|
+
return if data.empty?
|
46
|
+
|
47
|
+
@client.bulk(:body => data, :pipeline => @request_pipeline)
|
48
|
+
end
|
49
|
+
|
50
|
+
def serialize(obj)
|
51
|
+
Elasticsearch::API.serializer.dump(obj)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
attr_accessor :index_name
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Ingestion
|
13
|
+
class Ingester
|
14
|
+
def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
|
15
|
+
@sink_strategy = sink_strategy
|
16
|
+
@max_allowed_document_size = max_allowed_document_size
|
17
|
+
|
18
|
+
@ingested_count = 0
|
19
|
+
@ingested_volume = 0
|
20
|
+
@deleted_count = 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def ingest(document)
|
24
|
+
unless document&.any?
|
25
|
+
Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
|
26
|
+
return
|
27
|
+
end
|
28
|
+
|
29
|
+
serialized_document = @sink_strategy.serialize(document)
|
30
|
+
document_size = serialized_document.bytesize
|
31
|
+
|
32
|
+
if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
|
33
|
+
Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
|
34
|
+
return
|
35
|
+
end
|
36
|
+
|
37
|
+
@sink_strategy.ingest(document['id'], serialized_document)
|
38
|
+
|
39
|
+
@ingested_count += 1
|
40
|
+
@ingested_volume += document_size
|
41
|
+
end
|
42
|
+
|
43
|
+
def ingest_multiple(documents)
|
44
|
+
documents.each { |doc| ingest(doc) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def delete(id)
|
48
|
+
return if id.nil?
|
49
|
+
|
50
|
+
@sink_strategy.delete(id)
|
51
|
+
|
52
|
+
@deleted_count += 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete_multiple(ids)
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def flush
|
60
|
+
@sink_strategy.flush
|
61
|
+
end
|
62
|
+
|
63
|
+
def ingestion_stats
|
64
|
+
{
|
65
|
+
:indexed_document_count => @ingested_count,
|
66
|
+
:indexed_document_volume => @ingested_volume,
|
67
|
+
:deleted_document_count => @deleted_count
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def do_ingest(_id, _serialized_document)
|
74
|
+
raise NotImplementedError
|
75
|
+
end
|
76
|
+
|
77
|
+
def do_delete(_id)
|
78
|
+
raise NotImplementedError
|
79
|
+
end
|
80
|
+
|
81
|
+
def do_flush
|
82
|
+
raise NotImplementedError
|
83
|
+
end
|
84
|
+
|
85
|
+
def do_serialize(_document)
|
86
|
+
raise NotImplementedError
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -6,8 +6,5 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'core/
|
10
|
-
require 'core/
|
11
|
-
require 'core/output_sink/combined_sink'
|
12
|
-
|
13
|
-
module Core::OutputSink; end
|
9
|
+
require 'core/ingestion/ingester'
|
10
|
+
require 'core/ingestion/es_sink'
|
@@ -16,6 +16,9 @@ module Core
|
|
16
16
|
class NativeScheduler < Core::Scheduler
|
17
17
|
def connector_settings
|
18
18
|
Core::ConnectorSettings.fetch_native_connectors || []
|
19
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
20
|
+
# should be handled by the general scheduler
|
21
|
+
raise e
|
19
22
|
rescue StandardError => e
|
20
23
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
|
21
24
|
[]
|
data/lib/core/scheduler.rb
CHANGED
@@ -9,6 +9,8 @@
|
|
9
9
|
require 'time'
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility/cron'
|
13
15
|
require 'utility/logger'
|
14
16
|
require 'utility/exception_tracking'
|
@@ -37,13 +39,18 @@ module Core
|
|
37
39
|
if configuration_triggered?(cs)
|
38
40
|
yield cs, :configuration
|
39
41
|
end
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
40
45
|
end
|
41
|
-
|
42
|
-
|
43
|
-
end
|
46
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
47
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
44
48
|
rescue StandardError => e
|
45
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
47
54
|
if @poll_interval > 0 && !@is_shutting_down
|
48
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
49
56
|
sleep(@poll_interval)
|
@@ -59,8 +66,6 @@ module Core
|
|
59
66
|
private
|
60
67
|
|
61
68
|
def sync_triggered?(connector_settings)
|
62
|
-
return false unless connector_registered?(connector_settings.service_type)
|
63
|
-
|
64
69
|
unless connector_settings.valid_index_name?
|
65
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
66
71
|
return false
|
@@ -126,8 +131,6 @@ module Core
|
|
126
131
|
end
|
127
132
|
|
128
133
|
def heartbeat_triggered?(connector_settings)
|
129
|
-
return false unless connector_registered?(connector_settings.service_type)
|
130
|
-
|
131
134
|
last_seen = connector_settings[:last_seen]
|
132
135
|
return true if last_seen.nil? || last_seen.empty?
|
133
136
|
last_seen = begin
|
@@ -141,11 +144,41 @@ module Core
|
|
141
144
|
end
|
142
145
|
|
143
146
|
def configuration_triggered?(connector_settings)
|
144
|
-
|
145
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
146
157
|
end
|
147
158
|
|
148
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
149
182
|
end
|
150
183
|
|
151
184
|
def connector_registered?(service_type)
|
@@ -21,6 +21,9 @@ module Core
|
|
21
21
|
def connector_settings
|
22
22
|
connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
|
23
23
|
[connector_settings]
|
24
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
25
|
+
# should be handled by the general scheduler
|
26
|
+
raise e
|
24
27
|
rescue StandardError => e
|
25
28
|
Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
|
26
29
|
[]
|
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,14 +21,18 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
22
26
|
def initialize(connector_settings)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@
|
28
|
+
@ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
|
25
29
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
|
-
@
|
30
|
+
@sync_finished = false
|
31
|
+
@sync_error = nil
|
27
32
|
@status = {
|
28
33
|
:indexed_document_count => 0,
|
29
34
|
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0,
|
30
36
|
:error => nil
|
31
37
|
}
|
32
38
|
end
|
@@ -39,9 +45,12 @@ module Core
|
|
39
45
|
private
|
40
46
|
|
41
47
|
def do_sync!
|
42
|
-
Utility::Logger.info("
|
48
|
+
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
49
|
|
44
|
-
|
50
|
+
job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
|
51
|
+
job_description = job_record['_source']
|
52
|
+
job_id = job_record['_id']
|
53
|
+
job_description['_id'] = job_id
|
45
54
|
|
46
55
|
unless job_id.present?
|
47
56
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -51,18 +60,34 @@ module Core
|
|
51
60
|
begin
|
52
61
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
53
62
|
|
54
|
-
@
|
63
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
64
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
65
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
66
|
+
|
67
|
+
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
68
|
+
|
69
|
+
connector_instance.do_health_check!
|
55
70
|
|
56
71
|
incoming_ids = []
|
57
72
|
existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
|
58
73
|
|
59
74
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
60
75
|
|
61
|
-
|
76
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
77
|
+
reporting_cycle_start = Time.now
|
78
|
+
Utility::Logger.info('Yielding documents...')
|
79
|
+
connector_instance.yield_documents do |document|
|
62
80
|
document = add_ingest_metadata(document)
|
63
|
-
|
64
|
-
|
65
|
-
|
81
|
+
post_process_result = post_processing_engine.process(document)
|
82
|
+
if post_process_result.is_include?
|
83
|
+
@ingester.ingest(document)
|
84
|
+
incoming_ids << document['id']
|
85
|
+
end
|
86
|
+
|
87
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
88
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
89
|
+
reporting_cycle_start = Time.now
|
90
|
+
end
|
66
91
|
end
|
67
92
|
|
68
93
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -70,23 +95,48 @@ module Core
|
|
70
95
|
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
|
71
96
|
|
72
97
|
ids_to_delete.each do |id|
|
73
|
-
@
|
74
|
-
|
98
|
+
@ingester.delete(id)
|
99
|
+
|
100
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
101
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
102
|
+
reporting_cycle_start = Time.now
|
103
|
+
end
|
75
104
|
end
|
76
105
|
|
77
|
-
@
|
106
|
+
@ingester.flush
|
107
|
+
|
108
|
+
# We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
|
109
|
+
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
110
|
+
@sync_finished = true
|
78
111
|
rescue StandardError => e
|
79
|
-
@
|
112
|
+
@sync_error = e.message
|
80
113
|
Utility::ExceptionTracking.log_exception(e)
|
81
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
82
114
|
ensure
|
115
|
+
stats = @ingester.ingestion_stats
|
116
|
+
|
117
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
118
|
+
|
119
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
120
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
121
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
122
|
+
|
83
123
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
84
124
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
85
125
|
|
86
|
-
|
126
|
+
# Make sure to not override a previous error message
|
127
|
+
if !@sync_finished && @sync_error.nil?
|
128
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
129
|
+
end
|
130
|
+
|
131
|
+
unless connector_instance.nil?
|
132
|
+
metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
133
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
134
|
+
end
|
135
|
+
|
136
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
87
137
|
|
88
|
-
if @
|
89
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@
|
138
|
+
if @sync_error
|
139
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
90
140
|
else
|
91
141
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
92
142
|
end
|
@@ -107,5 +157,15 @@ module Core
|
|
107
157
|
|
108
158
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
109
159
|
end
|
160
|
+
|
161
|
+
def validate_filtering(filtering)
|
162
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
163
|
+
|
164
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
165
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
166
|
+
|
167
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
168
|
+
raise errors_present_error if validation_result[:errors].present?
|
169
|
+
end
|
110
170
|
end
|
111
171
|
end
|
data/lib/core.rb
CHANGED
@@ -7,8 +7,10 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Common
|
11
|
+
class << self
|
12
|
+
def return_if_present(*args)
|
13
|
+
args.each do |arg|
|
14
|
+
return arg unless arg.nil?
|
15
|
+
end
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/utility/constants.rb
CHANGED