connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T010623Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -8
- data/lib/app/app.rb +4 -0
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +44 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +132 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +80 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +87 -0
- data/lib/utility/constants.rb +7 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +31 -12
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,141 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Filtering
|
13
|
+
class SimpleRule
|
14
|
+
DEFAULT_RULE_ID = 'DEFAULT'
|
15
|
+
|
16
|
+
class Policy
|
17
|
+
INCLUDE = 'include'
|
18
|
+
EXCLUDE = 'exclude'
|
19
|
+
end
|
20
|
+
|
21
|
+
class Rule
|
22
|
+
REGEX = 'regex'
|
23
|
+
EQUALS = 'equals'
|
24
|
+
STARTS_WITH = 'starts_with'
|
25
|
+
ENDS_WITH = 'ends_with'
|
26
|
+
CONTAINS = 'contains'
|
27
|
+
LESS_THAN = '<'
|
28
|
+
GREATER_THAN = '>'
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :policy, :field, :rule, :value, :id
|
32
|
+
|
33
|
+
def initialize(rule_hash)
|
34
|
+
@policy = rule_hash.fetch('policy')
|
35
|
+
@field = rule_hash.fetch('field')
|
36
|
+
@rule = rule_hash.fetch('rule')
|
37
|
+
@value = rule_hash.fetch('value')
|
38
|
+
@id = rule_hash.fetch('id')
|
39
|
+
@rule_hash = rule_hash
|
40
|
+
rescue KeyError => e
|
41
|
+
raise "#{e.key} is required"
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.from_args(id, policy, field, rule, value)
|
45
|
+
SimpleRule.new(
|
46
|
+
{
|
47
|
+
'id' => id,
|
48
|
+
'policy' => policy,
|
49
|
+
'field' => field,
|
50
|
+
'rule' => rule,
|
51
|
+
'value' => value
|
52
|
+
}
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
DEFAULT_RULE = SimpleRule.new(
|
57
|
+
'policy' => 'include',
|
58
|
+
'field' => '_',
|
59
|
+
'rule' => 'regex',
|
60
|
+
'value' => '.*',
|
61
|
+
'id' => SimpleRule::DEFAULT_RULE_ID
|
62
|
+
)
|
63
|
+
|
64
|
+
def match?(document)
|
65
|
+
return true if id == DEFAULT_RULE_ID
|
66
|
+
doc_value = document[field]
|
67
|
+
return false if doc_value.nil?
|
68
|
+
coerced_value = coerce(doc_value)
|
69
|
+
case rule
|
70
|
+
when Rule::EQUALS
|
71
|
+
case coerced_value
|
72
|
+
when Integer
|
73
|
+
doc_value == coerced_value
|
74
|
+
when DateTime, Time
|
75
|
+
doc_value.to_s == coerced_value.to_s
|
76
|
+
else
|
77
|
+
doc_value.to_s == coerced_value
|
78
|
+
end
|
79
|
+
when Rule::STARTS_WITH
|
80
|
+
doc_value.to_s.start_with?(value)
|
81
|
+
when Rule::ENDS_WITH
|
82
|
+
doc_value.to_s.end_with?(value)
|
83
|
+
when Rule::CONTAINS
|
84
|
+
doc_value.to_s.include?(value)
|
85
|
+
when Rule::REGEX
|
86
|
+
doc_value.to_s.match(/#{value}/)
|
87
|
+
when Rule::LESS_THAN
|
88
|
+
doc_value < coerced_value
|
89
|
+
when Rule::GREATER_THAN
|
90
|
+
doc_value > coerced_value
|
91
|
+
else
|
92
|
+
false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def coerce(doc_value)
|
97
|
+
case doc_value
|
98
|
+
when String
|
99
|
+
value.to_s
|
100
|
+
when Integer
|
101
|
+
value.to_i
|
102
|
+
when DateTime, Time
|
103
|
+
to_date(value)
|
104
|
+
when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
|
105
|
+
to_bool(value).to_s
|
106
|
+
else
|
107
|
+
value.to_s
|
108
|
+
end
|
109
|
+
rescue StandardError => e
|
110
|
+
Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
|
111
|
+
value.to_s
|
112
|
+
end
|
113
|
+
|
114
|
+
def is_include?
|
115
|
+
policy == Policy::INCLUDE
|
116
|
+
end
|
117
|
+
|
118
|
+
def is_exclude?
|
119
|
+
policy == Policy::EXCLUDE
|
120
|
+
end
|
121
|
+
|
122
|
+
def to_h
|
123
|
+
@rule_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def to_bool(str)
|
129
|
+
return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
|
130
|
+
return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
|
131
|
+
raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_date(str)
|
135
|
+
DateTime.parse(str)
|
136
|
+
rescue ArgumentError
|
137
|
+
Time.at(str.to_i) # try with it as an int string of millis
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
15
|
+
|
16
|
+
class ValidationJobRunner
|
17
|
+
def initialize(connector_settings)
|
18
|
+
@connector_settings = connector_settings
|
19
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
20
|
+
@validation_finished = false
|
21
|
+
@status = { :error => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
|
26
|
+
|
27
|
+
validation_result = @connector_class.validate_filtering(@connector_settings.filtering[:draft])
|
28
|
+
|
29
|
+
# currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
|
30
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
|
31
|
+
|
32
|
+
@validation_finished = true
|
33
|
+
rescue StandardError => e
|
34
|
+
Utility::ExceptionTracking.log_exception(e)
|
35
|
+
validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
|
36
|
+
:errors => [
|
37
|
+
{ :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
|
38
|
+
] }
|
39
|
+
ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
|
40
|
+
ensure
|
41
|
+
if !@validation_finished && !@status[:error].present?
|
42
|
+
@status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
|
43
|
+
end
|
44
|
+
|
45
|
+
if @status[:error]
|
46
|
+
Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
|
47
|
+
else
|
48
|
+
Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Filtering
|
11
|
+
class ValidationStatus
|
12
|
+
INVALID = 'invalid'
|
13
|
+
VALID = 'valid'
|
14
|
+
EDITED = 'edited'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering/post_process_engine'
|
10
|
+
require 'core/filtering/post_process_result'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
require 'core/filtering/validation_job_runner'
|
13
|
+
require 'core/filtering/validation_status'
|
14
|
+
|
15
|
+
module Core::Filtering
|
16
|
+
DEFAULT_DOMAIN = 'DEFAULT'
|
17
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'app/config'
|
10
|
+
require 'utility/bulk_queue'
|
11
|
+
require 'utility/es_client'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'elasticsearch/api'
|
14
|
+
#
|
15
|
+
# This class is responsible for sending the data to the data storage.
|
16
|
+
# While we don't actually allow to output our data anywhere except
|
17
|
+
# Elasticsearch, we still want to be able to do so sometime in future.
|
18
|
+
#
|
19
|
+
# This class should stay simple and any change to the class should be careful
|
20
|
+
# with the thought of introducing other sinks in future.
|
21
|
+
module Core
|
22
|
+
module Ingestion
|
23
|
+
class EsSink
|
24
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
25
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
26
|
+
@index_name = index_name
|
27
|
+
@request_pipeline = request_pipeline
|
28
|
+
@operation_queue = bulk_queue
|
29
|
+
|
30
|
+
@max_allowed_document_size = max_allowed_document_size
|
31
|
+
|
32
|
+
@queued = {
|
33
|
+
:indexed_document_count => 0,
|
34
|
+
:deleted_document_count => 0,
|
35
|
+
:indexed_document_volume => 0
|
36
|
+
}
|
37
|
+
|
38
|
+
@completed = {
|
39
|
+
:indexed_document_count => 0,
|
40
|
+
:deleted_document_count => 0,
|
41
|
+
:indexed_document_volume => 0
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def ingest(document)
|
46
|
+
if document.nil? || document.empty?
|
47
|
+
Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
id = document['id']
|
52
|
+
serialized_document = serialize(document)
|
53
|
+
|
54
|
+
document_size = serialized_document.bytesize
|
55
|
+
|
56
|
+
if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
|
57
|
+
Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
|
58
|
+
return
|
59
|
+
end
|
60
|
+
|
61
|
+
index_op = serialize({ 'index' => { '_index' => @index_name, '_id' => id } })
|
62
|
+
|
63
|
+
flush unless @operation_queue.will_fit?(index_op, serialized_document)
|
64
|
+
|
65
|
+
@operation_queue.add(
|
66
|
+
index_op,
|
67
|
+
serialized_document
|
68
|
+
)
|
69
|
+
|
70
|
+
@queued[:indexed_document_count] += 1
|
71
|
+
@queued[:indexed_document_volume] += document_size
|
72
|
+
end
|
73
|
+
|
74
|
+
def ingest_multiple(documents)
|
75
|
+
documents.each { |doc| ingest(doc) }
|
76
|
+
end
|
77
|
+
|
78
|
+
def delete(id)
|
79
|
+
return if id.nil?
|
80
|
+
|
81
|
+
delete_op = serialize({ 'delete' => { '_index' => @index_name, '_id' => id } })
|
82
|
+
flush unless @operation_queue.will_fit?(delete_op)
|
83
|
+
|
84
|
+
@operation_queue.add(delete_op)
|
85
|
+
@queued[:deleted_document_count] += 1
|
86
|
+
end
|
87
|
+
|
88
|
+
def delete_multiple(ids)
|
89
|
+
ids.each { |id| delete(id) }
|
90
|
+
end
|
91
|
+
|
92
|
+
def flush
|
93
|
+
data = @operation_queue.pop_all
|
94
|
+
return if data.empty?
|
95
|
+
|
96
|
+
@client.bulk(:body => data, :pipeline => @request_pipeline)
|
97
|
+
|
98
|
+
@completed[:indexed_document_count] += @queued[:indexed_document_count]
|
99
|
+
@completed[:deleted_document_count] += @queued[:deleted_document_count]
|
100
|
+
@completed[:indexed_document_volume] += @queued[:indexed_document_volume]
|
101
|
+
|
102
|
+
@queued[:indexed_document_count] = 0
|
103
|
+
@queued[:deleted_document_count] = 0
|
104
|
+
@queued[:indexed_document_volume] = 0
|
105
|
+
end
|
106
|
+
|
107
|
+
def ingestion_stats
|
108
|
+
@completed.dup
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def serialize(document)
|
114
|
+
Elasticsearch::API.serializer.dump(document)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/constants'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Jobs
|
13
|
+
class Consumer
|
14
|
+
def initialize(scheduler:,
|
15
|
+
max_ingestion_queue_size:,
|
16
|
+
max_ingestion_queue_bytes:,
|
17
|
+
poll_interval: 3,
|
18
|
+
termination_timeout: 60,
|
19
|
+
min_threads: 1,
|
20
|
+
max_threads: 5,
|
21
|
+
max_queue: 100,
|
22
|
+
idle_time: 5)
|
23
|
+
@scheduler = scheduler
|
24
|
+
@poll_interval = poll_interval
|
25
|
+
@termination_timeout = termination_timeout
|
26
|
+
@min_threads = min_threads
|
27
|
+
@max_threads = max_threads
|
28
|
+
@max_queue = max_queue
|
29
|
+
@idle_time = idle_time
|
30
|
+
|
31
|
+
@max_ingestion_queue_size = max_ingestion_queue_size
|
32
|
+
@max_ingestion_queue_bytes = max_ingestion_queue_bytes
|
33
|
+
|
34
|
+
@running = Concurrent::AtomicBoolean.new(false)
|
35
|
+
end
|
36
|
+
|
37
|
+
def subscribe!(index_name:)
|
38
|
+
@index_name = index_name
|
39
|
+
|
40
|
+
start_loop!
|
41
|
+
end
|
42
|
+
|
43
|
+
def running?
|
44
|
+
# @TODO check if a loop thread is alive
|
45
|
+
pool.running? && @running.true?
|
46
|
+
end
|
47
|
+
|
48
|
+
def shutdown!
|
49
|
+
Utility::Logger.info("Shutting down consumer for #{@index_name} index")
|
50
|
+
@running.make_false
|
51
|
+
pool.shutdown
|
52
|
+
pool.wait_for_termination(@termination_timeout)
|
53
|
+
# reset pool
|
54
|
+
@pool = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def start_loop!
|
60
|
+
Utility::Logger.info("Starting a new consumer for #{@index_name} index")
|
61
|
+
|
62
|
+
Thread.new do
|
63
|
+
# assign a name to the thread
|
64
|
+
# see @TODO in #self.running?
|
65
|
+
Thread.current[:name] = "consumer-group-#{@index_name}"
|
66
|
+
|
67
|
+
loop do
|
68
|
+
if @running.false?
|
69
|
+
Utility::Logger.info('Shutting down the loop')
|
70
|
+
break
|
71
|
+
end
|
72
|
+
|
73
|
+
sleep(@poll_interval)
|
74
|
+
Utility::Logger.debug('Getting registered connectors')
|
75
|
+
|
76
|
+
connectors = ready_for_sync_connectors
|
77
|
+
next unless connectors.any?
|
78
|
+
|
79
|
+
Utility::Logger.debug("Number of available connectors: #{connectors.size}")
|
80
|
+
|
81
|
+
# @TODO It is assumed that @index_name is used to retrive pending jobs.
|
82
|
+
# This will be discussed after 8.6 release
|
83
|
+
pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
|
84
|
+
Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
|
85
|
+
|
86
|
+
pending_jobs.each do |job|
|
87
|
+
connector_settings = connectors[job.connector_id]
|
88
|
+
|
89
|
+
pool.post do
|
90
|
+
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
91
|
+
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
92
|
+
job_runner = Core::SyncJobRunner.new(
|
93
|
+
connector_settings,
|
94
|
+
job,
|
95
|
+
@max_ingestion_queue_size,
|
96
|
+
@max_ingestion_queue_bytes
|
97
|
+
)
|
98
|
+
job_runner.execute
|
99
|
+
rescue Core::JobAlreadyRunningError
|
100
|
+
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
101
|
+
rescue Core::ConnectorVersionChangedError => e
|
102
|
+
Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
|
103
|
+
rescue StandardError => e
|
104
|
+
Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
rescue StandardError => e
|
108
|
+
Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
@running.make_true
|
113
|
+
end
|
114
|
+
|
115
|
+
def pool
|
116
|
+
@pool ||= Concurrent::ThreadPoolExecutor.new(
|
117
|
+
min_threads: @min_threads,
|
118
|
+
max_threads: @max_threads,
|
119
|
+
max_queue: @max_queue,
|
120
|
+
fallback_policy: :abort,
|
121
|
+
idletime: @idle_time
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
def ready_for_sync_connectors
|
126
|
+
@scheduler.connector_settings
|
127
|
+
.select(&:ready_for_sync?)
|
128
|
+
.inject({}) { |memo, cs| memo.merge(cs.id => cs) }
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module Jobs
|
11
|
+
class Producer
|
12
|
+
JOB_TYPES = %i(sync).freeze
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def enqueue_job(job_type:, connector_settings:)
|
16
|
+
raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
|
17
|
+
raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
|
18
|
+
|
19
|
+
ElasticConnectorActions.create_job(connector_settings: connector_settings)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class UnsupportedJobType < StandardError; end
|
25
|
+
end
|
26
|
+
end
|
data/lib/core/scheduler.rb
CHANGED
@@ -10,6 +10,7 @@ require 'time'
|
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
13
14
|
require 'utility/cron'
|
14
15
|
require 'utility/logger'
|
15
16
|
require 'utility/exception_tracking'
|
@@ -38,15 +39,18 @@ module Core
|
|
38
39
|
if configuration_triggered?(cs)
|
39
40
|
yield cs, :configuration
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
44
45
|
end
|
45
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
47
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
47
48
|
rescue StandardError => e
|
48
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
49
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
50
54
|
if @poll_interval > 0 && !@is_shutting_down
|
51
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
52
56
|
sleep(@poll_interval)
|
@@ -62,8 +66,6 @@ module Core
|
|
62
66
|
private
|
63
67
|
|
64
68
|
def sync_triggered?(connector_settings)
|
65
|
-
return false unless connector_registered?(connector_settings.service_type)
|
66
|
-
|
67
69
|
unless connector_settings.valid_index_name?
|
68
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
69
71
|
return false
|
@@ -129,8 +131,6 @@ module Core
|
|
129
131
|
end
|
130
132
|
|
131
133
|
def heartbeat_triggered?(connector_settings)
|
132
|
-
return false unless connector_registered?(connector_settings.service_type)
|
133
|
-
|
134
134
|
last_seen = connector_settings[:last_seen]
|
135
135
|
return true if last_seen.nil? || last_seen.empty?
|
136
136
|
last_seen = begin
|
@@ -144,11 +144,41 @@ module Core
|
|
144
144
|
end
|
145
145
|
|
146
146
|
def configuration_triggered?(connector_settings)
|
147
|
-
|
148
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
149
157
|
end
|
150
158
|
|
151
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
152
182
|
end
|
153
183
|
|
154
184
|
def connector_registered?(service_type)
|
@@ -20,7 +20,7 @@ module Core
|
|
20
20
|
|
21
21
|
def connector_settings
|
22
22
|
connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
|
23
|
-
[connector_settings]
|
23
|
+
[connector_settings].compact
|
24
24
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
25
25
|
# should be handled by the general scheduler
|
26
26
|
raise e
|