connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -10
- data/lib/app/config.rb +2 -0
- data/lib/app/dispatcher.rb +17 -1
- data/lib/app/preflight_check.rb +15 -0
- data/lib/connectors/base/connector.rb +37 -4
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
- data/lib/connectors/example/connector.rb +43 -4
- data/lib/connectors/gitlab/connector.rb +16 -2
- data/lib/connectors/mongodb/connector.rb +173 -50
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/registry.rb +2 -2
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/configuration.rb +4 -2
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +29 -18
- data/lib/core/elastic_connector_actions.rb +331 -32
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/native_scheduler.rb +3 -0
- data/lib/core/scheduler.rb +43 -10
- data/lib/core/single_scheduler.rb +3 -0
- data/lib/core/sync_job_runner.rb +78 -18
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/common.rb +20 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -3
- metadata +27 -18
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
15
|
+
|
16
|
+
class ValidationJobRunner
|
17
|
+
def initialize(connector_settings)
|
18
|
+
@connector_settings = connector_settings
|
19
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
20
|
+
@validation_finished = false
|
21
|
+
@status = { :error => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
|
26
|
+
|
27
|
+
validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
|
28
|
+
|
29
|
+
# currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
|
30
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
|
31
|
+
|
32
|
+
@validation_finished = true
|
33
|
+
rescue StandardError => e
|
34
|
+
Utility::ExceptionTracking.log_exception(e)
|
35
|
+
validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
|
36
|
+
:errors => [
|
37
|
+
{ :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
|
38
|
+
] }
|
39
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
|
40
|
+
ensure
|
41
|
+
if !@validation_finished && !@status[:error].present?
|
42
|
+
@status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
|
43
|
+
end
|
44
|
+
|
45
|
+
if @status[:error]
|
46
|
+
Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
|
47
|
+
else
|
48
|
+
Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -4,10 +4,14 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Filtering
|
11
|
+
class ValidationStatus
|
12
|
+
INVALID = 'invalid'
|
13
|
+
VALID = 'valid'
|
14
|
+
EDITED = 'edited'
|
15
|
+
end
|
12
16
|
end
|
13
17
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering/post_process_engine'
|
10
|
+
require 'core/filtering/post_process_result'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
require 'core/filtering/validation_job_runner'
|
13
|
+
require 'core/filtering/validation_status'
|
14
|
+
|
15
|
+
module Core::Filtering
|
16
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
17
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'app/config'
|
10
|
+
require 'utility/bulk_queue'
|
11
|
+
require 'utility/es_client'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'elasticsearch/api'
|
14
|
+
|
15
|
+
module Core
|
16
|
+
module Ingestion
|
17
|
+
class EsSink
|
18
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = bulk_queue
|
23
|
+
end
|
24
|
+
|
25
|
+
def ingest(id, serialized_document)
|
26
|
+
index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
|
27
|
+
|
28
|
+
flush unless @operation_queue.will_fit?(index_op, serialized_document)
|
29
|
+
|
30
|
+
@operation_queue.add(
|
31
|
+
index_op,
|
32
|
+
serialized_document
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def delete(doc_id)
|
37
|
+
delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
|
38
|
+
flush unless @operation_queue.will_fit?(delete_op)
|
39
|
+
|
40
|
+
@operation_queue.add(delete_op)
|
41
|
+
end
|
42
|
+
|
43
|
+
def flush
|
44
|
+
data = @operation_queue.pop_all
|
45
|
+
return if data.empty?
|
46
|
+
|
47
|
+
@client.bulk(:body => data, :pipeline => @request_pipeline)
|
48
|
+
end
|
49
|
+
|
50
|
+
def serialize(obj)
|
51
|
+
Elasticsearch::API.serializer.dump(obj)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
attr_accessor :index_name
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Ingestion
|
13
|
+
class Ingester
|
14
|
+
def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
|
15
|
+
@sink_strategy = sink_strategy
|
16
|
+
@max_allowed_document_size = max_allowed_document_size
|
17
|
+
|
18
|
+
@ingested_count = 0
|
19
|
+
@ingested_volume = 0
|
20
|
+
@deleted_count = 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def ingest(document)
|
24
|
+
unless document&.any?
|
25
|
+
Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
|
26
|
+
return
|
27
|
+
end
|
28
|
+
|
29
|
+
serialized_document = @sink_strategy.serialize(document)
|
30
|
+
document_size = serialized_document.bytesize
|
31
|
+
|
32
|
+
if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
|
33
|
+
Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
|
34
|
+
return
|
35
|
+
end
|
36
|
+
|
37
|
+
@sink_strategy.ingest(document['id'], serialized_document)
|
38
|
+
|
39
|
+
@ingested_count += 1
|
40
|
+
@ingested_volume += document_size
|
41
|
+
end
|
42
|
+
|
43
|
+
def ingest_multiple(documents)
|
44
|
+
documents.each { |doc| ingest(doc) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def delete(id)
|
48
|
+
return if id.nil?
|
49
|
+
|
50
|
+
@sink_strategy.delete(id)
|
51
|
+
|
52
|
+
@deleted_count += 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete_multiple(ids)
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def flush
|
60
|
+
@sink_strategy.flush
|
61
|
+
end
|
62
|
+
|
63
|
+
def ingestion_stats
|
64
|
+
{
|
65
|
+
:indexed_document_count => @ingested_count,
|
66
|
+
:indexed_document_volume => @ingested_volume,
|
67
|
+
:deleted_document_count => @deleted_count
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def do_ingest(_id, _serialized_document)
|
74
|
+
raise NotImplementedError
|
75
|
+
end
|
76
|
+
|
77
|
+
def do_delete(_id)
|
78
|
+
raise NotImplementedError
|
79
|
+
end
|
80
|
+
|
81
|
+
def do_flush
|
82
|
+
raise NotImplementedError
|
83
|
+
end
|
84
|
+
|
85
|
+
def do_serialize(_document)
|
86
|
+
raise NotImplementedError
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -6,8 +6,5 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'core/
|
10
|
-
require 'core/
|
11
|
-
require 'core/output_sink/combined_sink'
|
12
|
-
|
13
|
-
module Core::OutputSink; end
|
9
|
+
require 'core/ingestion/ingester'
|
10
|
+
require 'core/ingestion/es_sink'
|
@@ -16,6 +16,9 @@ module Core
|
|
16
16
|
class NativeScheduler < Core::Scheduler
|
17
17
|
def connector_settings
|
18
18
|
Core::ConnectorSettings.fetch_native_connectors || []
|
19
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
20
|
+
# should be handled by the general scheduler
|
21
|
+
raise e
|
19
22
|
rescue StandardError => e
|
20
23
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
|
21
24
|
[]
|
data/lib/core/scheduler.rb
CHANGED
@@ -9,6 +9,8 @@
|
|
9
9
|
require 'time'
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility/cron'
|
13
15
|
require 'utility/logger'
|
14
16
|
require 'utility/exception_tracking'
|
@@ -37,13 +39,18 @@ module Core
|
|
37
39
|
if configuration_triggered?(cs)
|
38
40
|
yield cs, :configuration
|
39
41
|
end
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
40
45
|
end
|
41
|
-
|
42
|
-
|
43
|
-
end
|
46
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
47
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
44
48
|
rescue StandardError => e
|
45
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
47
54
|
if @poll_interval > 0 && !@is_shutting_down
|
48
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
49
56
|
sleep(@poll_interval)
|
@@ -59,8 +66,6 @@ module Core
|
|
59
66
|
private
|
60
67
|
|
61
68
|
def sync_triggered?(connector_settings)
|
62
|
-
return false unless connector_registered?(connector_settings.service_type)
|
63
|
-
|
64
69
|
unless connector_settings.valid_index_name?
|
65
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
66
71
|
return false
|
@@ -126,8 +131,6 @@ module Core
|
|
126
131
|
end
|
127
132
|
|
128
133
|
def heartbeat_triggered?(connector_settings)
|
129
|
-
return false unless connector_registered?(connector_settings.service_type)
|
130
|
-
|
131
134
|
last_seen = connector_settings[:last_seen]
|
132
135
|
return true if last_seen.nil? || last_seen.empty?
|
133
136
|
last_seen = begin
|
@@ -141,11 +144,41 @@ module Core
|
|
141
144
|
end
|
142
145
|
|
143
146
|
def configuration_triggered?(connector_settings)
|
144
|
-
|
145
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
146
157
|
end
|
147
158
|
|
148
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
149
182
|
end
|
150
183
|
|
151
184
|
def connector_registered?(service_type)
|
@@ -21,6 +21,9 @@ module Core
|
|
21
21
|
def connector_settings
|
22
22
|
connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
|
23
23
|
[connector_settings]
|
24
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
25
|
+
# should be handled by the general scheduler
|
26
|
+
raise e
|
24
27
|
rescue StandardError => e
|
25
28
|
Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
|
26
29
|
[]
|
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,14 +21,18 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
22
26
|
def initialize(connector_settings)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@
|
28
|
+
@ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
|
25
29
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
|
-
@
|
30
|
+
@sync_finished = false
|
31
|
+
@sync_error = nil
|
27
32
|
@status = {
|
28
33
|
:indexed_document_count => 0,
|
29
34
|
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0,
|
30
36
|
:error => nil
|
31
37
|
}
|
32
38
|
end
|
@@ -39,9 +45,12 @@ module Core
|
|
39
45
|
private
|
40
46
|
|
41
47
|
def do_sync!
|
42
|
-
Utility::Logger.info("
|
48
|
+
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
49
|
|
44
|
-
|
50
|
+
job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
|
51
|
+
job_description = job_record['_source']
|
52
|
+
job_id = job_record['_id']
|
53
|
+
job_description['_id'] = job_id
|
45
54
|
|
46
55
|
unless job_id.present?
|
47
56
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -51,18 +60,34 @@ module Core
|
|
51
60
|
begin
|
52
61
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
53
62
|
|
54
|
-
@
|
63
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
64
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
65
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
66
|
+
|
67
|
+
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
68
|
+
|
69
|
+
connector_instance.do_health_check!
|
55
70
|
|
56
71
|
incoming_ids = []
|
57
72
|
existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
|
58
73
|
|
59
74
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
60
75
|
|
61
|
-
|
76
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
77
|
+
reporting_cycle_start = Time.now
|
78
|
+
Utility::Logger.info('Yielding documents...')
|
79
|
+
connector_instance.yield_documents do |document|
|
62
80
|
document = add_ingest_metadata(document)
|
63
|
-
|
64
|
-
|
65
|
-
|
81
|
+
post_process_result = post_processing_engine.process(document)
|
82
|
+
if post_process_result.is_include?
|
83
|
+
@ingester.ingest(document)
|
84
|
+
incoming_ids << document['id']
|
85
|
+
end
|
86
|
+
|
87
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
88
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
89
|
+
reporting_cycle_start = Time.now
|
90
|
+
end
|
66
91
|
end
|
67
92
|
|
68
93
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -70,23 +95,48 @@ module Core
|
|
70
95
|
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
|
71
96
|
|
72
97
|
ids_to_delete.each do |id|
|
73
|
-
@
|
74
|
-
|
98
|
+
@ingester.delete(id)
|
99
|
+
|
100
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
101
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
102
|
+
reporting_cycle_start = Time.now
|
103
|
+
end
|
75
104
|
end
|
76
105
|
|
77
|
-
@
|
106
|
+
@ingester.flush
|
107
|
+
|
108
|
+
# We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
|
109
|
+
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
110
|
+
@sync_finished = true
|
78
111
|
rescue StandardError => e
|
79
|
-
@
|
112
|
+
@sync_error = e.message
|
80
113
|
Utility::ExceptionTracking.log_exception(e)
|
81
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
82
114
|
ensure
|
115
|
+
stats = @ingester.ingestion_stats
|
116
|
+
|
117
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
118
|
+
|
119
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
120
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
121
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
122
|
+
|
83
123
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
84
124
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
85
125
|
|
86
|
-
|
126
|
+
# Make sure to not override a previous error message
|
127
|
+
if !@sync_finished && @sync_error.nil?
|
128
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
129
|
+
end
|
130
|
+
|
131
|
+
unless connector_instance.nil?
|
132
|
+
metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
133
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
134
|
+
end
|
135
|
+
|
136
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
87
137
|
|
88
|
-
if @
|
89
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@
|
138
|
+
if @sync_error
|
139
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
90
140
|
else
|
91
141
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
92
142
|
end
|
@@ -107,5 +157,15 @@ module Core
|
|
107
157
|
|
108
158
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
109
159
|
end
|
160
|
+
|
161
|
+
def validate_filtering(filtering)
|
162
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
163
|
+
|
164
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
165
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
166
|
+
|
167
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
168
|
+
raise errors_present_error if validation_result[:errors].present?
|
169
|
+
end
|
110
170
|
end
|
111
171
|
end
|
data/lib/core.rb
CHANGED
@@ -7,8 +7,10 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Common
|
11
|
+
class << self
|
12
|
+
def return_if_present(*args)
|
13
|
+
args.each do |arg|
|
14
|
+
return arg unless arg.nil?
|
15
|
+
end
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/utility/constants.rb
CHANGED