connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/dispatcher.rb +12 -0
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/connector.rb +19 -12
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +15 -0
- data/lib/connectors/gitlab/connector.rb +15 -1
- data/lib/connectors/mongodb/connector.rb +55 -36
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/sync_job_runner.rb +65 -17
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +16 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
15
|
+
|
16
|
+
class ValidationJobRunner
|
17
|
+
def initialize(connector_settings)
|
18
|
+
@connector_settings = connector_settings
|
19
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
20
|
+
@validation_finished = false
|
21
|
+
@status = { :error => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
|
26
|
+
|
27
|
+
validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
|
28
|
+
|
29
|
+
# currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
|
30
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
|
31
|
+
|
32
|
+
@validation_finished = true
|
33
|
+
rescue StandardError => e
|
34
|
+
Utility::ExceptionTracking.log_exception(e)
|
35
|
+
validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
|
36
|
+
:errors => [
|
37
|
+
{ :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
|
38
|
+
] }
|
39
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
|
40
|
+
ensure
|
41
|
+
if !@validation_finished && !@status[:error].present?
|
42
|
+
@status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
|
43
|
+
end
|
44
|
+
|
45
|
+
if @status[:error]
|
46
|
+
Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
|
47
|
+
else
|
48
|
+
Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Filtering
|
11
|
+
class ValidationStatus
|
12
|
+
INVALID = 'invalid'
|
13
|
+
VALID = 'valid'
|
14
|
+
EDITED = 'edited'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering/post_process_engine'
|
10
|
+
require 'core/filtering/post_process_result'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
require 'core/filtering/validation_job_runner'
|
13
|
+
require 'core/filtering/validation_status'
|
14
|
+
|
15
|
+
module Core::Filtering
|
16
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
17
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'app/config'
|
10
|
+
require 'utility/bulk_queue'
|
11
|
+
require 'utility/es_client'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'elasticsearch/api'
|
14
|
+
|
15
|
+
module Core
|
16
|
+
module Ingestion
|
17
|
+
class EsSink
|
18
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = bulk_queue
|
23
|
+
end
|
24
|
+
|
25
|
+
def ingest(id, serialized_document)
|
26
|
+
index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
|
27
|
+
|
28
|
+
flush unless @operation_queue.will_fit?(index_op, serialized_document)
|
29
|
+
|
30
|
+
@operation_queue.add(
|
31
|
+
index_op,
|
32
|
+
serialized_document
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def delete(doc_id)
|
37
|
+
delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
|
38
|
+
flush unless @operation_queue.will_fit?(delete_op)
|
39
|
+
|
40
|
+
@operation_queue.add(delete_op)
|
41
|
+
end
|
42
|
+
|
43
|
+
def flush
|
44
|
+
data = @operation_queue.pop_all
|
45
|
+
return if data.empty?
|
46
|
+
|
47
|
+
@client.bulk(:body => data, :pipeline => @request_pipeline)
|
48
|
+
end
|
49
|
+
|
50
|
+
def serialize(obj)
|
51
|
+
Elasticsearch::API.serializer.dump(obj)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
attr_accessor :index_name
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Ingestion
|
13
|
+
class Ingester
|
14
|
+
def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
|
15
|
+
@sink_strategy = sink_strategy
|
16
|
+
@max_allowed_document_size = max_allowed_document_size
|
17
|
+
|
18
|
+
@ingested_count = 0
|
19
|
+
@ingested_volume = 0
|
20
|
+
@deleted_count = 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def ingest(document)
|
24
|
+
unless document&.any?
|
25
|
+
Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
|
26
|
+
return
|
27
|
+
end
|
28
|
+
|
29
|
+
serialized_document = @sink_strategy.serialize(document)
|
30
|
+
document_size = serialized_document.bytesize
|
31
|
+
|
32
|
+
if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
|
33
|
+
Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
|
34
|
+
return
|
35
|
+
end
|
36
|
+
|
37
|
+
@sink_strategy.ingest(document['id'], serialized_document)
|
38
|
+
|
39
|
+
@ingested_count += 1
|
40
|
+
@ingested_volume += document_size
|
41
|
+
end
|
42
|
+
|
43
|
+
def ingest_multiple(documents)
|
44
|
+
documents.each { |doc| ingest(doc) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def delete(id)
|
48
|
+
return if id.nil?
|
49
|
+
|
50
|
+
@sink_strategy.delete(id)
|
51
|
+
|
52
|
+
@deleted_count += 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete_multiple(ids)
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def flush
|
60
|
+
@sink_strategy.flush
|
61
|
+
end
|
62
|
+
|
63
|
+
def ingestion_stats
|
64
|
+
{
|
65
|
+
:indexed_document_count => @ingested_count,
|
66
|
+
:indexed_document_volume => @ingested_volume,
|
67
|
+
:deleted_document_count => @deleted_count
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def do_ingest(_id, _serialized_document)
|
74
|
+
raise NotImplementedError
|
75
|
+
end
|
76
|
+
|
77
|
+
def do_delete(_id)
|
78
|
+
raise NotImplementedError
|
79
|
+
end
|
80
|
+
|
81
|
+
def do_flush
|
82
|
+
raise NotImplementedError
|
83
|
+
end
|
84
|
+
|
85
|
+
def do_serialize(_document)
|
86
|
+
raise NotImplementedError
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -6,8 +6,5 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'core/
|
10
|
-
require 'core/
|
11
|
-
require 'core/output_sink/combined_sink'
|
12
|
-
|
13
|
-
module Core::OutputSink; end
|
9
|
+
require 'core/ingestion/ingester'
|
10
|
+
require 'core/ingestion/es_sink'
|
data/lib/core/scheduler.rb
CHANGED
@@ -10,6 +10,7 @@ require 'time'
|
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
13
14
|
require 'utility/cron'
|
14
15
|
require 'utility/logger'
|
15
16
|
require 'utility/exception_tracking'
|
@@ -38,15 +39,18 @@ module Core
|
|
38
39
|
if configuration_triggered?(cs)
|
39
40
|
yield cs, :configuration
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
44
45
|
end
|
45
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
47
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
47
48
|
rescue StandardError => e
|
48
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
49
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
50
54
|
if @poll_interval > 0 && !@is_shutting_down
|
51
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
52
56
|
sleep(@poll_interval)
|
@@ -62,8 +66,6 @@ module Core
|
|
62
66
|
private
|
63
67
|
|
64
68
|
def sync_triggered?(connector_settings)
|
65
|
-
return false unless connector_registered?(connector_settings.service_type)
|
66
|
-
|
67
69
|
unless connector_settings.valid_index_name?
|
68
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
69
71
|
return false
|
@@ -129,8 +131,6 @@ module Core
|
|
129
131
|
end
|
130
132
|
|
131
133
|
def heartbeat_triggered?(connector_settings)
|
132
|
-
return false unless connector_registered?(connector_settings.service_type)
|
133
|
-
|
134
134
|
last_seen = connector_settings[:last_seen]
|
135
135
|
return true if last_seen.nil? || last_seen.empty?
|
136
136
|
last_seen = begin
|
@@ -144,11 +144,41 @@ module Core
|
|
144
144
|
end
|
145
145
|
|
146
146
|
def configuration_triggered?(connector_settings)
|
147
|
-
|
148
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
149
157
|
end
|
150
158
|
|
151
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
152
182
|
end
|
153
183
|
|
154
184
|
def connector_registered?(service_type)
|
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,14 +21,18 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
22
26
|
def initialize(connector_settings)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@
|
28
|
+
@ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
|
25
29
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
30
|
@sync_finished = false
|
31
|
+
@sync_error = nil
|
27
32
|
@status = {
|
28
33
|
:indexed_document_count => 0,
|
29
34
|
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0,
|
30
36
|
:error => nil
|
31
37
|
}
|
32
38
|
end
|
@@ -41,8 +47,10 @@ module Core
|
|
41
47
|
def do_sync!
|
42
48
|
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
49
|
|
44
|
-
|
45
|
-
|
50
|
+
job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
|
51
|
+
job_description = job_record['_source']
|
52
|
+
job_id = job_record['_id']
|
53
|
+
job_description['_id'] = job_id
|
46
54
|
|
47
55
|
unless job_id.present?
|
48
56
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -52,6 +60,10 @@ module Core
|
|
52
60
|
begin
|
53
61
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
54
62
|
|
63
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
64
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
65
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
66
|
+
|
55
67
|
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
56
68
|
|
57
69
|
connector_instance.do_health_check!
|
@@ -61,11 +73,21 @@ module Core
|
|
61
73
|
|
62
74
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
63
75
|
|
76
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
77
|
+
reporting_cycle_start = Time.now
|
78
|
+
Utility::Logger.info('Yielding documents...')
|
64
79
|
connector_instance.yield_documents do |document|
|
65
80
|
document = add_ingest_metadata(document)
|
66
|
-
|
67
|
-
|
68
|
-
|
81
|
+
post_process_result = post_processing_engine.process(document)
|
82
|
+
if post_process_result.is_include?
|
83
|
+
@ingester.ingest(document)
|
84
|
+
incoming_ids << document['id']
|
85
|
+
end
|
86
|
+
|
87
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
88
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
89
|
+
reporting_cycle_start = Time.now
|
90
|
+
end
|
69
91
|
end
|
70
92
|
|
71
93
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -73,32 +95,48 @@ module Core
|
|
73
95
|
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
|
74
96
|
|
75
97
|
ids_to_delete.each do |id|
|
76
|
-
@
|
77
|
-
|
98
|
+
@ingester.delete(id)
|
99
|
+
|
100
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
101
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
102
|
+
reporting_cycle_start = Time.now
|
103
|
+
end
|
78
104
|
end
|
79
105
|
|
80
|
-
@
|
106
|
+
@ingester.flush
|
81
107
|
|
82
108
|
# We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
|
83
109
|
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
84
110
|
@sync_finished = true
|
85
111
|
rescue StandardError => e
|
86
|
-
@
|
112
|
+
@sync_error = e.message
|
87
113
|
Utility::ExceptionTracking.log_exception(e)
|
88
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
89
114
|
ensure
|
115
|
+
stats = @ingester.ingestion_stats
|
116
|
+
|
117
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
118
|
+
|
119
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
120
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
121
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
122
|
+
|
90
123
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
91
124
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
92
125
|
|
93
126
|
# Make sure to not override a previous error message
|
94
|
-
if !@sync_finished && @
|
95
|
-
@
|
127
|
+
if !@sync_finished && @sync_error.nil?
|
128
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
96
129
|
end
|
97
130
|
|
98
|
-
|
131
|
+
unless connector_instance.nil?
|
132
|
+
metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
133
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
134
|
+
end
|
135
|
+
|
136
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
99
137
|
|
100
|
-
if @
|
101
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@
|
138
|
+
if @sync_error
|
139
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
102
140
|
else
|
103
141
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
104
142
|
end
|
@@ -119,5 +157,15 @@ module Core
|
|
119
157
|
|
120
158
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
121
159
|
end
|
160
|
+
|
161
|
+
def validate_filtering(filtering)
|
162
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
163
|
+
|
164
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
165
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
166
|
+
|
167
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
168
|
+
raise errors_present_error if validation_result[:errors].present?
|
169
|
+
end
|
122
170
|
end
|
123
171
|
end
|
data/lib/core.rb
CHANGED
@@ -7,8 +7,10 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/utility/constants.rb
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Filtering
|
11
|
+
class << self
|
12
|
+
def extract_filter(filtering)
|
13
|
+
return {} unless filtering.present?
|
14
|
+
|
15
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
+
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
+
|
18
|
+
filter.present? ? filter : {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/utility/logger.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'config'
|
7
8
|
require 'logger'
|
8
9
|
require 'active_support/core_ext/module'
|
9
10
|
require 'active_support/core_ext/string/filters'
|
@@ -23,7 +24,7 @@ module Utility
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
28
|
end
|
28
29
|
|
29
30
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,15 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'utility/bulk_queue'
|
8
|
+
require 'utility/common'
|
7
9
|
require 'utility/constants'
|
8
10
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
11
|
+
require 'utility/elasticsearch/index/mappings'
|
12
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
|
+
require 'utility/environment'
|
10
14
|
require 'utility/errors'
|
11
15
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
16
|
require 'utility/exception_tracking'
|
14
17
|
require 'utility/extension_mapping_util'
|
15
18
|
require 'utility/logger'
|
16
|
-
require 'utility/elasticsearch/index/mappings'
|
17
|
-
require 'utility/elasticsearch/index/text_analysis_settings'
|