connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T010623Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -8
- data/lib/app/app.rb +4 -0
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +44 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +132 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +80 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +87 -0
- data/lib/utility/constants.rb +7 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +31 -12
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,141 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Filtering
|
13
|
+
class SimpleRule
|
14
|
+
DEFAULT_RULE_ID = 'DEFAULT'
|
15
|
+
|
16
|
+
class Policy
|
17
|
+
INCLUDE = 'include'
|
18
|
+
EXCLUDE = 'exclude'
|
19
|
+
end
|
20
|
+
|
21
|
+
class Rule
|
22
|
+
REGEX = 'regex'
|
23
|
+
EQUALS = 'equals'
|
24
|
+
STARTS_WITH = 'starts_with'
|
25
|
+
ENDS_WITH = 'ends_with'
|
26
|
+
CONTAINS = 'contains'
|
27
|
+
LESS_THAN = '<'
|
28
|
+
GREATER_THAN = '>'
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :policy, :field, :rule, :value, :id
|
32
|
+
|
33
|
+
def initialize(rule_hash)
|
34
|
+
@policy = rule_hash.fetch('policy')
|
35
|
+
@field = rule_hash.fetch('field')
|
36
|
+
@rule = rule_hash.fetch('rule')
|
37
|
+
@value = rule_hash.fetch('value')
|
38
|
+
@id = rule_hash.fetch('id')
|
39
|
+
@rule_hash = rule_hash
|
40
|
+
rescue KeyError => e
|
41
|
+
raise "#{e.key} is required"
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.from_args(id, policy, field, rule, value)
|
45
|
+
SimpleRule.new(
|
46
|
+
{
|
47
|
+
'id' => id,
|
48
|
+
'policy' => policy,
|
49
|
+
'field' => field,
|
50
|
+
'rule' => rule,
|
51
|
+
'value' => value
|
52
|
+
}
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
DEFAULT_RULE = SimpleRule.new(
|
57
|
+
'policy' => 'include',
|
58
|
+
'field' => '_',
|
59
|
+
'rule' => 'regex',
|
60
|
+
'value' => '.*',
|
61
|
+
'id' => SimpleRule::DEFAULT_RULE_ID
|
62
|
+
)
|
63
|
+
|
64
|
+
def match?(document)
|
65
|
+
return true if id == DEFAULT_RULE_ID
|
66
|
+
doc_value = document[field]
|
67
|
+
return false if doc_value.nil?
|
68
|
+
coerced_value = coerce(doc_value)
|
69
|
+
case rule
|
70
|
+
when Rule::EQUALS
|
71
|
+
case coerced_value
|
72
|
+
when Integer
|
73
|
+
doc_value == coerced_value
|
74
|
+
when DateTime, Time
|
75
|
+
doc_value.to_s == coerced_value.to_s
|
76
|
+
else
|
77
|
+
doc_value.to_s == coerced_value
|
78
|
+
end
|
79
|
+
when Rule::STARTS_WITH
|
80
|
+
doc_value.to_s.start_with?(value)
|
81
|
+
when Rule::ENDS_WITH
|
82
|
+
doc_value.to_s.end_with?(value)
|
83
|
+
when Rule::CONTAINS
|
84
|
+
doc_value.to_s.include?(value)
|
85
|
+
when Rule::REGEX
|
86
|
+
doc_value.to_s.match(/#{value}/)
|
87
|
+
when Rule::LESS_THAN
|
88
|
+
doc_value < coerced_value
|
89
|
+
when Rule::GREATER_THAN
|
90
|
+
doc_value > coerced_value
|
91
|
+
else
|
92
|
+
false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def coerce(doc_value)
|
97
|
+
case doc_value
|
98
|
+
when String
|
99
|
+
value.to_s
|
100
|
+
when Integer
|
101
|
+
value.to_i
|
102
|
+
when DateTime, Time
|
103
|
+
to_date(value)
|
104
|
+
when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
|
105
|
+
to_bool(value).to_s
|
106
|
+
else
|
107
|
+
value.to_s
|
108
|
+
end
|
109
|
+
rescue StandardError => e
|
110
|
+
Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
|
111
|
+
value.to_s
|
112
|
+
end
|
113
|
+
|
114
|
+
def is_include?
|
115
|
+
policy == Policy::INCLUDE
|
116
|
+
end
|
117
|
+
|
118
|
+
def is_exclude?
|
119
|
+
policy == Policy::EXCLUDE
|
120
|
+
end
|
121
|
+
|
122
|
+
def to_h
|
123
|
+
@rule_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def to_bool(str)
|
129
|
+
return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
|
130
|
+
return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
|
131
|
+
raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_date(str)
|
135
|
+
DateTime.parse(str)
|
136
|
+
rescue ArgumentError
|
137
|
+
Time.at(str.to_i) # try with it as an int string of millis
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
15
|
+
|
16
|
+
class ValidationJobRunner
|
17
|
+
def initialize(connector_settings)
|
18
|
+
@connector_settings = connector_settings
|
19
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
20
|
+
@validation_finished = false
|
21
|
+
@status = { :error => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
|
26
|
+
|
27
|
+
validation_result = @connector_class.validate_filtering(@connector_settings.filtering[:draft])
|
28
|
+
|
29
|
+
# currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
|
30
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
|
31
|
+
|
32
|
+
@validation_finished = true
|
33
|
+
rescue StandardError => e
|
34
|
+
Utility::ExceptionTracking.log_exception(e)
|
35
|
+
validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
|
36
|
+
:errors => [
|
37
|
+
{ :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
|
38
|
+
] }
|
39
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
|
40
|
+
ensure
|
41
|
+
if !@validation_finished && !@status[:error].present?
|
42
|
+
@status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
|
43
|
+
end
|
44
|
+
|
45
|
+
if @status[:error]
|
46
|
+
Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
|
47
|
+
else
|
48
|
+
Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Filtering
|
11
|
+
class ValidationStatus
|
12
|
+
INVALID = 'invalid'
|
13
|
+
VALID = 'valid'
|
14
|
+
EDITED = 'edited'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering/post_process_engine'
|
10
|
+
require 'core/filtering/post_process_result'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
require 'core/filtering/validation_job_runner'
|
13
|
+
require 'core/filtering/validation_status'
|
14
|
+
|
15
|
+
module Core::Filtering
|
16
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
17
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'app/config'
|
10
|
+
require 'utility/bulk_queue'
|
11
|
+
require 'utility/es_client'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'elasticsearch/api'
|
14
|
+
#
|
15
|
+
# This class is responsible for sending the data to the data storage.
|
16
|
+
# While we don't actually allow to output our data anywhere except
|
17
|
+
# Elasticsearch, we still want to be able to do so sometime in future.
|
18
|
+
#
|
19
|
+
# This class should stay simple and any change to the class should be careful
|
20
|
+
# with the thought of introducing other sinks in future.
|
21
|
+
module Core
|
22
|
+
module Ingestion
|
23
|
+
class EsSink
|
24
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
25
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
26
|
+
@index_name = index_name
|
27
|
+
@request_pipeline = request_pipeline
|
28
|
+
@operation_queue = bulk_queue
|
29
|
+
|
30
|
+
@max_allowed_document_size = max_allowed_document_size
|
31
|
+
|
32
|
+
@queued = {
|
33
|
+
:indexed_document_count => 0,
|
34
|
+
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0
|
36
|
+
}
|
37
|
+
|
38
|
+
@completed = {
|
39
|
+
:indexed_document_count => 0,
|
40
|
+
:deleted_document_count => 0,
|
41
|
+
:indexed_document_volume => 0
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def ingest(document)
|
46
|
+
if document.nil? || document.empty?
|
47
|
+
Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
id = document['id']
|
52
|
+
serialized_document = serialize(document)
|
53
|
+
|
54
|
+
document_size = serialized_document.bytesize
|
55
|
+
|
56
|
+
if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
|
57
|
+
Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
|
58
|
+
return
|
59
|
+
end
|
60
|
+
|
61
|
+
index_op = serialize({ 'index' => { '_index' => @index_name, '_id' => id } })
|
62
|
+
|
63
|
+
flush unless @operation_queue.will_fit?(index_op, serialized_document)
|
64
|
+
|
65
|
+
@operation_queue.add(
|
66
|
+
index_op,
|
67
|
+
serialized_document
|
68
|
+
)
|
69
|
+
|
70
|
+
@queued[:indexed_document_count] += 1
|
71
|
+
@queued[:indexed_document_volume] += document_size
|
72
|
+
end
|
73
|
+
|
74
|
+
def ingest_multiple(documents)
|
75
|
+
documents.each { |doc| ingest(doc) }
|
76
|
+
end
|
77
|
+
|
78
|
+
def delete(id)
|
79
|
+
return if id.nil?
|
80
|
+
|
81
|
+
delete_op = serialize({ 'delete' => { '_index' => @index_name, '_id' => id } })
|
82
|
+
flush unless @operation_queue.will_fit?(delete_op)
|
83
|
+
|
84
|
+
@operation_queue.add(delete_op)
|
85
|
+
@queued[:deleted_document_count] += 1
|
86
|
+
end
|
87
|
+
|
88
|
+
def delete_multiple(ids)
|
89
|
+
ids.each { |id| delete(id) }
|
90
|
+
end
|
91
|
+
|
92
|
+
def flush
|
93
|
+
data = @operation_queue.pop_all
|
94
|
+
return if data.empty?
|
95
|
+
|
96
|
+
@client.bulk(:body => data, :pipeline => @request_pipeline)
|
97
|
+
|
98
|
+
@completed[:indexed_document_count] += @queued[:indexed_document_count]
|
99
|
+
@completed[:deleted_document_count] += @queued[:deleted_document_count]
|
100
|
+
@completed[:indexed_document_volume] += @queued[:indexed_document_volume]
|
101
|
+
|
102
|
+
@queued[:indexed_document_count] = 0
|
103
|
+
@queued[:deleted_document_count] = 0
|
104
|
+
@queued[:indexed_document_volume] = 0
|
105
|
+
end
|
106
|
+
|
107
|
+
def ingestion_stats
|
108
|
+
@completed.dup
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def serialize(document)
|
114
|
+
Elasticsearch::API.serializer.dump(document)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/constants'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Jobs
|
13
|
+
class Consumer
|
14
|
+
def initialize(scheduler:,
|
15
|
+
max_ingestion_queue_size:,
|
16
|
+
max_ingestion_queue_bytes:,
|
17
|
+
poll_interval: 3,
|
18
|
+
termination_timeout: 60,
|
19
|
+
min_threads: 1,
|
20
|
+
max_threads: 5,
|
21
|
+
max_queue: 100,
|
22
|
+
idle_time: 5)
|
23
|
+
@scheduler = scheduler
|
24
|
+
@poll_interval = poll_interval
|
25
|
+
@termination_timeout = termination_timeout
|
26
|
+
@min_threads = min_threads
|
27
|
+
@max_threads = max_threads
|
28
|
+
@max_queue = max_queue
|
29
|
+
@idle_time = idle_time
|
30
|
+
|
31
|
+
@max_ingestion_queue_size = max_ingestion_queue_size
|
32
|
+
@max_ingestion_queue_bytes = max_ingestion_queue_bytes
|
33
|
+
|
34
|
+
@running = Concurrent::AtomicBoolean.new(false)
|
35
|
+
end
|
36
|
+
|
37
|
+
def subscribe!(index_name:)
|
38
|
+
@index_name = index_name
|
39
|
+
|
40
|
+
start_loop!
|
41
|
+
end
|
42
|
+
|
43
|
+
def running?
|
44
|
+
# @TODO check if a loop thread is alive
|
45
|
+
pool.running? && @running.true?
|
46
|
+
end
|
47
|
+
|
48
|
+
def shutdown!
|
49
|
+
Utility::Logger.info("Shutting down consumer for #{@index_name} index")
|
50
|
+
@running.make_false
|
51
|
+
pool.shutdown
|
52
|
+
pool.wait_for_termination(@termination_timeout)
|
53
|
+
# reset pool
|
54
|
+
@pool = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def start_loop!
|
60
|
+
Utility::Logger.info("Starting a new consumer for #{@index_name} index")
|
61
|
+
|
62
|
+
Thread.new do
|
63
|
+
# assign a name to the thread
|
64
|
+
# see @TODO in #self.running?
|
65
|
+
Thread.current[:name] = "consumer-group-#{@index_name}"
|
66
|
+
|
67
|
+
loop do
|
68
|
+
if @running.false?
|
69
|
+
Utility::Logger.info('Shutting down the loop')
|
70
|
+
break
|
71
|
+
end
|
72
|
+
|
73
|
+
sleep(@poll_interval)
|
74
|
+
Utility::Logger.debug('Getting registered connectors')
|
75
|
+
|
76
|
+
connectors = ready_for_sync_connectors
|
77
|
+
next unless connectors.any?
|
78
|
+
|
79
|
+
Utility::Logger.debug("Number of available connectors: #{connectors.size}")
|
80
|
+
|
81
|
+
# @TODO It is assumed that @index_name is used to retrive pending jobs.
|
82
|
+
# This will be discussed after 8.6 release
|
83
|
+
pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
|
84
|
+
Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
|
85
|
+
|
86
|
+
pending_jobs.each do |job|
|
87
|
+
connector_settings = connectors[job.connector_id]
|
88
|
+
|
89
|
+
pool.post do
|
90
|
+
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
91
|
+
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
92
|
+
job_runner = Core::SyncJobRunner.new(
|
93
|
+
connector_settings,
|
94
|
+
job,
|
95
|
+
@max_ingestion_queue_size,
|
96
|
+
@max_ingestion_queue_bytes
|
97
|
+
)
|
98
|
+
job_runner.execute
|
99
|
+
rescue Core::JobAlreadyRunningError
|
100
|
+
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
101
|
+
rescue Core::ConnectorVersionChangedError => e
|
102
|
+
Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
|
103
|
+
rescue StandardError => e
|
104
|
+
Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
rescue StandardError => e
|
108
|
+
Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
@running.make_true
|
113
|
+
end
|
114
|
+
|
115
|
+
def pool
|
116
|
+
@pool ||= Concurrent::ThreadPoolExecutor.new(
|
117
|
+
min_threads: @min_threads,
|
118
|
+
max_threads: @max_threads,
|
119
|
+
max_queue: @max_queue,
|
120
|
+
fallback_policy: :abort,
|
121
|
+
idletime: @idle_time
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
def ready_for_sync_connectors
|
126
|
+
@scheduler.connector_settings
|
127
|
+
.select(&:ready_for_sync?)
|
128
|
+
.inject({}) { |memo, cs| memo.merge(cs.id => cs) }
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Jobs
|
11
|
+
class Producer
|
12
|
+
JOB_TYPES = %i(sync).freeze
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def enqueue_job(job_type:, connector_settings:)
|
16
|
+
raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
|
17
|
+
raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
|
18
|
+
|
19
|
+
ElasticConnectorActions.create_job(connector_settings: connector_settings)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class UnsupportedJobType < StandardError; end
|
25
|
+
end
|
26
|
+
end
|
data/lib/core/scheduler.rb
CHANGED
@@ -10,6 +10,7 @@ require 'time'
|
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
13
14
|
require 'utility/cron'
|
14
15
|
require 'utility/logger'
|
15
16
|
require 'utility/exception_tracking'
|
@@ -38,15 +39,18 @@ module Core
|
|
38
39
|
if configuration_triggered?(cs)
|
39
40
|
yield cs, :configuration
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
44
45
|
end
|
45
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
47
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
47
48
|
rescue StandardError => e
|
48
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
49
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
50
54
|
if @poll_interval > 0 && !@is_shutting_down
|
51
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
52
56
|
sleep(@poll_interval)
|
@@ -62,8 +66,6 @@ module Core
|
|
62
66
|
private
|
63
67
|
|
64
68
|
def sync_triggered?(connector_settings)
|
65
|
-
return false unless connector_registered?(connector_settings.service_type)
|
66
|
-
|
67
69
|
unless connector_settings.valid_index_name?
|
68
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
69
71
|
return false
|
@@ -129,8 +131,6 @@ module Core
|
|
129
131
|
end
|
130
132
|
|
131
133
|
def heartbeat_triggered?(connector_settings)
|
132
|
-
return false unless connector_registered?(connector_settings.service_type)
|
133
|
-
|
134
134
|
last_seen = connector_settings[:last_seen]
|
135
135
|
return true if last_seen.nil? || last_seen.empty?
|
136
136
|
last_seen = begin
|
@@ -144,11 +144,41 @@ module Core
|
|
144
144
|
end
|
145
145
|
|
146
146
|
def configuration_triggered?(connector_settings)
|
147
|
-
|
148
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
149
157
|
end
|
150
158
|
|
151
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
152
182
|
end
|
153
183
|
|
154
184
|
def connector_registered?(service_type)
|
@@ -20,7 +20,7 @@ module Core
|
|
20
20
|
|
21
21
|
def connector_settings
|
22
22
|
connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
|
23
|
-
[connector_settings]
|
23
|
+
[connector_settings].compact
|
24
24
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
25
25
|
# should be handled by the general scheduler
|
26
26
|
raise e
|