connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/dispatcher.rb +12 -0
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/connector.rb +19 -12
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +15 -0
- data/lib/connectors/gitlab/connector.rb +15 -1
- data/lib/connectors/mongodb/connector.rb +55 -36
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/sync_job_runner.rb +65 -17
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +16 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
15
|
+
|
16
|
+
class ValidationJobRunner
|
17
|
+
def initialize(connector_settings)
|
18
|
+
@connector_settings = connector_settings
|
19
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
20
|
+
@validation_finished = false
|
21
|
+
@status = { :error => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
|
26
|
+
|
27
|
+
validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
|
28
|
+
|
29
|
+
# currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
|
30
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
|
31
|
+
|
32
|
+
@validation_finished = true
|
33
|
+
rescue StandardError => e
|
34
|
+
Utility::ExceptionTracking.log_exception(e)
|
35
|
+
validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
|
36
|
+
:errors => [
|
37
|
+
{ :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
|
38
|
+
] }
|
39
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
|
40
|
+
ensure
|
41
|
+
if !@validation_finished && !@status[:error].present?
|
42
|
+
@status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
|
43
|
+
end
|
44
|
+
|
45
|
+
if @status[:error]
|
46
|
+
Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
|
47
|
+
else
|
48
|
+
Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Filtering
|
11
|
+
class ValidationStatus
|
12
|
+
INVALID = 'invalid'
|
13
|
+
VALID = 'valid'
|
14
|
+
EDITED = 'edited'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering/post_process_engine'
|
10
|
+
require 'core/filtering/post_process_result'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
require 'core/filtering/validation_job_runner'
|
13
|
+
require 'core/filtering/validation_status'
|
14
|
+
|
15
|
+
module Core::Filtering
|
16
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
17
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'app/config'
|
10
|
+
require 'utility/bulk_queue'
|
11
|
+
require 'utility/es_client'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'elasticsearch/api'
|
14
|
+
|
15
|
+
module Core
|
16
|
+
module Ingestion
|
17
|
+
class EsSink
|
18
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = bulk_queue
|
23
|
+
end
|
24
|
+
|
25
|
+
def ingest(id, serialized_document)
|
26
|
+
index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
|
27
|
+
|
28
|
+
flush unless @operation_queue.will_fit?(index_op, serialized_document)
|
29
|
+
|
30
|
+
@operation_queue.add(
|
31
|
+
index_op,
|
32
|
+
serialized_document
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def delete(doc_id)
|
37
|
+
delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
|
38
|
+
flush unless @operation_queue.will_fit?(delete_op)
|
39
|
+
|
40
|
+
@operation_queue.add(delete_op)
|
41
|
+
end
|
42
|
+
|
43
|
+
def flush
|
44
|
+
data = @operation_queue.pop_all
|
45
|
+
return if data.empty?
|
46
|
+
|
47
|
+
@client.bulk(:body => data, :pipeline => @request_pipeline)
|
48
|
+
end
|
49
|
+
|
50
|
+
def serialize(obj)
|
51
|
+
Elasticsearch::API.serializer.dump(obj)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
attr_accessor :index_name
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Ingestion
|
13
|
+
class Ingester
|
14
|
+
def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
|
15
|
+
@sink_strategy = sink_strategy
|
16
|
+
@max_allowed_document_size = max_allowed_document_size
|
17
|
+
|
18
|
+
@ingested_count = 0
|
19
|
+
@ingested_volume = 0
|
20
|
+
@deleted_count = 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def ingest(document)
|
24
|
+
unless document&.any?
|
25
|
+
Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
|
26
|
+
return
|
27
|
+
end
|
28
|
+
|
29
|
+
serialized_document = @sink_strategy.serialize(document)
|
30
|
+
document_size = serialized_document.bytesize
|
31
|
+
|
32
|
+
if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
|
33
|
+
Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
|
34
|
+
return
|
35
|
+
end
|
36
|
+
|
37
|
+
@sink_strategy.ingest(document['id'], serialized_document)
|
38
|
+
|
39
|
+
@ingested_count += 1
|
40
|
+
@ingested_volume += document_size
|
41
|
+
end
|
42
|
+
|
43
|
+
def ingest_multiple(documents)
|
44
|
+
documents.each { |doc| ingest(doc) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def delete(id)
|
48
|
+
return if id.nil?
|
49
|
+
|
50
|
+
@sink_strategy.delete(id)
|
51
|
+
|
52
|
+
@deleted_count += 1
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete_multiple(ids)
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def flush
|
60
|
+
@sink_strategy.flush
|
61
|
+
end
|
62
|
+
|
63
|
+
def ingestion_stats
|
64
|
+
{
|
65
|
+
:indexed_document_count => @ingested_count,
|
66
|
+
:indexed_document_volume => @ingested_volume,
|
67
|
+
:deleted_document_count => @deleted_count
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def do_ingest(_id, _serialized_document)
|
74
|
+
raise NotImplementedError
|
75
|
+
end
|
76
|
+
|
77
|
+
def do_delete(_id)
|
78
|
+
raise NotImplementedError
|
79
|
+
end
|
80
|
+
|
81
|
+
def do_flush
|
82
|
+
raise NotImplementedError
|
83
|
+
end
|
84
|
+
|
85
|
+
def do_serialize(_document)
|
86
|
+
raise NotImplementedError
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -6,8 +6,5 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'core/
|
10
|
-
require 'core/
|
11
|
-
require 'core/output_sink/combined_sink'
|
12
|
-
|
13
|
-
module Core::OutputSink; end
|
9
|
+
require 'core/ingestion/ingester'
|
10
|
+
require 'core/ingestion/es_sink'
|
data/lib/core/scheduler.rb
CHANGED
@@ -10,6 +10,7 @@ require 'time'
|
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
13
14
|
require 'utility/cron'
|
14
15
|
require 'utility/logger'
|
15
16
|
require 'utility/exception_tracking'
|
@@ -38,15 +39,18 @@ module Core
|
|
38
39
|
if configuration_triggered?(cs)
|
39
40
|
yield cs, :configuration
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
44
45
|
end
|
45
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
47
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
47
48
|
rescue StandardError => e
|
48
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
49
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
50
54
|
if @poll_interval > 0 && !@is_shutting_down
|
51
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
52
56
|
sleep(@poll_interval)
|
@@ -62,8 +66,6 @@ module Core
|
|
62
66
|
private
|
63
67
|
|
64
68
|
def sync_triggered?(connector_settings)
|
65
|
-
return false unless connector_registered?(connector_settings.service_type)
|
66
|
-
|
67
69
|
unless connector_settings.valid_index_name?
|
68
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
69
71
|
return false
|
@@ -129,8 +131,6 @@ module Core
|
|
129
131
|
end
|
130
132
|
|
131
133
|
def heartbeat_triggered?(connector_settings)
|
132
|
-
return false unless connector_registered?(connector_settings.service_type)
|
133
|
-
|
134
134
|
last_seen = connector_settings[:last_seen]
|
135
135
|
return true if last_seen.nil? || last_seen.empty?
|
136
136
|
last_seen = begin
|
@@ -144,11 +144,41 @@ module Core
|
|
144
144
|
end
|
145
145
|
|
146
146
|
def configuration_triggered?(connector_settings)
|
147
|
-
|
148
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
149
157
|
end
|
150
158
|
|
151
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
152
182
|
end
|
153
183
|
|
154
184
|
def connector_registered?(service_type)
|
data/lib/core/sync_job_runner.rb
CHANGED
@@ -8,7 +8,9 @@
|
|
8
8
|
|
9
9
|
require 'connectors/connector_status'
|
10
10
|
require 'connectors/registry'
|
11
|
-
require 'core/
|
11
|
+
require 'core/filtering/post_process_engine'
|
12
|
+
require 'core/ingestion'
|
13
|
+
require 'core/filtering/validation_status'
|
12
14
|
require 'utility'
|
13
15
|
|
14
16
|
module Core
|
@@ -19,14 +21,18 @@ module Core
|
|
19
21
|
end
|
20
22
|
|
21
23
|
class SyncJobRunner
|
24
|
+
JOB_REPORTING_INTERVAL = 10
|
25
|
+
|
22
26
|
def initialize(connector_settings)
|
23
27
|
@connector_settings = connector_settings
|
24
|
-
@
|
28
|
+
@ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
|
25
29
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
30
|
@sync_finished = false
|
31
|
+
@sync_error = nil
|
27
32
|
@status = {
|
28
33
|
:indexed_document_count => 0,
|
29
34
|
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0,
|
30
36
|
:error => nil
|
31
37
|
}
|
32
38
|
end
|
@@ -41,8 +47,10 @@ module Core
|
|
41
47
|
def do_sync!
|
42
48
|
Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
|
43
49
|
|
44
|
-
|
45
|
-
|
50
|
+
job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
|
51
|
+
job_description = job_record['_source']
|
52
|
+
job_id = job_record['_id']
|
53
|
+
job_description['_id'] = job_id
|
46
54
|
|
47
55
|
unless job_id.present?
|
48
56
|
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
@@ -52,6 +60,10 @@ module Core
|
|
52
60
|
begin
|
53
61
|
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
54
62
|
|
63
|
+
Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
|
64
|
+
validate_filtering(job_description.dig(:connector, :filtering))
|
65
|
+
Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
|
66
|
+
|
55
67
|
connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
|
56
68
|
|
57
69
|
connector_instance.do_health_check!
|
@@ -61,11 +73,21 @@ module Core
|
|
61
73
|
|
62
74
|
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
63
75
|
|
76
|
+
post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
|
77
|
+
reporting_cycle_start = Time.now
|
78
|
+
Utility::Logger.info('Yielding documents...')
|
64
79
|
connector_instance.yield_documents do |document|
|
65
80
|
document = add_ingest_metadata(document)
|
66
|
-
|
67
|
-
|
68
|
-
|
81
|
+
post_process_result = post_processing_engine.process(document)
|
82
|
+
if post_process_result.is_include?
|
83
|
+
@ingester.ingest(document)
|
84
|
+
incoming_ids << document['id']
|
85
|
+
end
|
86
|
+
|
87
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
88
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
89
|
+
reporting_cycle_start = Time.now
|
90
|
+
end
|
69
91
|
end
|
70
92
|
|
71
93
|
ids_to_delete = existing_ids - incoming_ids.uniq
|
@@ -73,32 +95,48 @@ module Core
|
|
73
95
|
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
|
74
96
|
|
75
97
|
ids_to_delete.each do |id|
|
76
|
-
@
|
77
|
-
|
98
|
+
@ingester.delete(id)
|
99
|
+
|
100
|
+
if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
|
101
|
+
ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
|
102
|
+
reporting_cycle_start = Time.now
|
103
|
+
end
|
78
104
|
end
|
79
105
|
|
80
|
-
@
|
106
|
+
@ingester.flush
|
81
107
|
|
82
108
|
# We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
|
83
109
|
# occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
|
84
110
|
@sync_finished = true
|
85
111
|
rescue StandardError => e
|
86
|
-
@
|
112
|
+
@sync_error = e.message
|
87
113
|
Utility::ExceptionTracking.log_exception(e)
|
88
|
-
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
89
114
|
ensure
|
115
|
+
stats = @ingester.ingestion_stats
|
116
|
+
|
117
|
+
Utility::Logger.debug("Sync stats are: #{stats}")
|
118
|
+
|
119
|
+
@status[:indexed_document_count] = stats[:indexed_document_count]
|
120
|
+
@status[:deleted_document_count] = stats[:deleted_document_count]
|
121
|
+
@status[:indexed_document_volume] = stats[:indexed_document_volume]
|
122
|
+
|
90
123
|
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
91
124
|
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
92
125
|
|
93
126
|
# Make sure to not override a previous error message
|
94
|
-
if !@sync_finished && @
|
95
|
-
@
|
127
|
+
if !@sync_finished && @sync_error.nil?
|
128
|
+
@sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
|
96
129
|
end
|
97
130
|
|
98
|
-
|
131
|
+
unless connector_instance.nil?
|
132
|
+
metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
|
133
|
+
metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
|
134
|
+
end
|
135
|
+
|
136
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
|
99
137
|
|
100
|
-
if @
|
101
|
-
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@
|
138
|
+
if @sync_error
|
139
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
|
102
140
|
else
|
103
141
|
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
104
142
|
end
|
@@ -119,5 +157,15 @@ module Core
|
|
119
157
|
|
120
158
|
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
121
159
|
end
|
160
|
+
|
161
|
+
def validate_filtering(filtering)
|
162
|
+
validation_result = @connector_class.validate_filtering(filtering)
|
163
|
+
|
164
|
+
wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
165
|
+
raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
|
166
|
+
|
167
|
+
errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
|
168
|
+
raise errors_present_error if validation_result[:errors].present?
|
169
|
+
end
|
122
170
|
end
|
123
171
|
end
|
data/lib/core.rb
CHANGED
@@ -7,8 +7,10 @@
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
9
|
require 'core/configuration'
|
10
|
+
require 'core/connector_job'
|
10
11
|
require 'core/connector_settings'
|
11
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering'
|
12
14
|
require 'core/heartbeat'
|
13
15
|
require 'core/scheduler'
|
14
16
|
require 'core/single_scheduler'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/utility/constants.rb
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Filtering
|
11
|
+
class << self
|
12
|
+
def extract_filter(filtering)
|
13
|
+
return {} unless filtering.present?
|
14
|
+
|
15
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
+
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
+
|
18
|
+
filter.present? ? filter : {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/utility/logger.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'config'
|
7
8
|
require 'logger'
|
8
9
|
require 'active_support/core_ext/module'
|
9
10
|
require 'active_support/core_ext/string/filters'
|
@@ -23,7 +24,7 @@ module Utility
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
28
|
end
|
28
29
|
|
29
30
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,15 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'utility/bulk_queue'
|
8
|
+
require 'utility/common'
|
7
9
|
require 'utility/constants'
|
8
10
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
11
|
+
require 'utility/elasticsearch/index/mappings'
|
12
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
|
+
require 'utility/environment'
|
10
14
|
require 'utility/errors'
|
11
15
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
16
|
require 'utility/exception_tracking'
|
14
17
|
require 'utility/extension_mapping_util'
|
15
18
|
require 'utility/logger'
|
16
|
-
require 'utility/elasticsearch/index/mappings'
|
17
|
-
require 'utility/elasticsearch/index/text_analysis_settings'
|