connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +10 -8
- data/lib/app/config.rb +6 -1
- data/lib/app/console_app.rb +1 -1
- data/lib/app/dispatcher.rb +18 -3
- data/lib/connectors/base/connector.rb +39 -22
- data/lib/connectors/crawler/scheduler.rb +36 -0
- data/lib/connectors/example/connector.rb +2 -2
- data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
- data/lib/connectors/gitlab/connector.rb +4 -4
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
- data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
- data/lib/connectors/mongodb/connector.rb +66 -56
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
- data/lib/connectors/registry.rb +1 -1
- data/lib/connectors/tolerable_error_helper.rb +5 -1
- data/lib/connectors_utility.rb +6 -3
- data/lib/core/configuration.rb +13 -1
- data/lib/core/connector_job.rb +48 -7
- data/lib/core/connector_settings.rb +52 -20
- data/lib/core/elastic_connector_actions.rb +54 -38
- data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
- data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
- data/lib/core/filtering/filter_validator.rb +103 -0
- data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
- data/lib/core/filtering/post_process_engine.rb +2 -2
- data/lib/core/filtering/processing_stage.rb +20 -0
- data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
- data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
- data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
- data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
- data/lib/core/filtering/transform/filter_transformer.rb +26 -0
- data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
- data/lib/core/filtering/transform/transformation_target.rb +10 -0
- data/lib/core/filtering/validation_job_runner.rb +1 -3
- data/lib/core/filtering.rb +5 -3
- data/lib/core/job_cleanup.rb +66 -0
- data/lib/core/jobs/consumer.rb +62 -64
- data/lib/core/jobs/producer.rb +3 -0
- data/lib/core/scheduler.rb +67 -52
- data/lib/core/sync_job_runner.rb +170 -83
- data/lib/core.rb +1 -0
- data/lib/utility/bulk_queue.rb +1 -1
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/error_monitor.rb +26 -5
- data/lib/utility/es_client.rb +4 -0
- data/lib/utility/filtering.rb +4 -0
- metadata +32 -21
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
@@ -0,0 +1,66 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
class JobCleanUp
|
14
|
+
class << self
|
15
|
+
def execute(connector_id = nil)
|
16
|
+
process_orphaned_jobs
|
17
|
+
process_idle_jobs(connector_id)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def process_orphaned_jobs
|
23
|
+
Utility::Logger.debug('Start cleaning up orphaned jobs...')
|
24
|
+
all_connectors = ConnectorSettings.fetch_all_connectors
|
25
|
+
orphaned_jobs = ConnectorJob.orphaned_jobs(all_connectors.map(&:id))
|
26
|
+
if orphaned_jobs.empty?
|
27
|
+
Utility::Logger.debug('No orphaned jobs found. Skipping...')
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
# delete content indicies in case they are re-created by sync job
|
32
|
+
content_indices = (orphaned_jobs.map(&:index_name) - all_connectors.map(&:index_name)).compact.uniq
|
33
|
+
ElasticConnectorActions.delete_indices(content_indices) if content_indices.any?
|
34
|
+
result = ConnectorJob.delete_jobs(orphaned_jobs)
|
35
|
+
Utility::Logger.error("Error found when deleting jobs: #{result['failures']}") if result['failures']&.any?
|
36
|
+
Utility::Logger.info("Successfully deleted #{result['deleted']} out of #{result['total']} orphaned jobs.")
|
37
|
+
end
|
38
|
+
|
39
|
+
def process_idle_jobs(connector_id = nil)
|
40
|
+
Utility::Logger.debug("Start cleaning up idle jobs for #{connector_id ? "connector #{connector_id}" : 'native connectors'}...")
|
41
|
+
idle_jobs = ConnectorJob.idle_jobs(connector_id)
|
42
|
+
if idle_jobs.empty?
|
43
|
+
Utility::Logger.debug('No idle jobs found. Skipping...')
|
44
|
+
return
|
45
|
+
end
|
46
|
+
|
47
|
+
marked_count = 0
|
48
|
+
idle_jobs.each do |job|
|
49
|
+
job.error!('The job has not seen any update for some time.')
|
50
|
+
Utility::Logger.debug("Successfully marked job #{job.id} as error.")
|
51
|
+
|
52
|
+
job_id = job.id
|
53
|
+
job = ConnectorJob.fetch_by_id(job_id)
|
54
|
+
Utility::Logger.warn("Could not found job by id #{job_id}") if job.nil?
|
55
|
+
Utility::Logger.warn("Could not found connector by id #{job.connector_id}") if job && job.connector.nil?
|
56
|
+
|
57
|
+
job&.connector&.update_last_sync!(job)
|
58
|
+
marked_count += 1
|
59
|
+
rescue StandardError => e
|
60
|
+
Utility::ExceptionTracking.log_exception(e)
|
61
|
+
end
|
62
|
+
Utility::Logger.info("Successfully marked #{marked_count} out of #{idle_jobs.count} idle jobs as error.")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/core/jobs/consumer.rb
CHANGED
@@ -6,7 +6,11 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
+
require 'utility/logger'
|
9
10
|
require 'utility/constants'
|
11
|
+
require 'core/connector_job'
|
12
|
+
require 'core/sync_job_runner'
|
13
|
+
require 'concurrent'
|
10
14
|
|
11
15
|
module Core
|
12
16
|
module Jobs
|
@@ -30,90 +34,39 @@ module Core
|
|
30
34
|
|
31
35
|
@max_ingestion_queue_size = max_ingestion_queue_size
|
32
36
|
@max_ingestion_queue_bytes = max_ingestion_queue_bytes
|
33
|
-
|
34
|
-
@running = Concurrent::AtomicBoolean.new(false)
|
35
37
|
end
|
36
38
|
|
37
39
|
def subscribe!(index_name:)
|
38
|
-
|
40
|
+
Utility::Logger.info("Starting a new consumer for #{index_name} index")
|
39
41
|
|
40
|
-
|
42
|
+
@index_name = index_name
|
43
|
+
start_timer_task!
|
44
|
+
start_thread_pool!
|
41
45
|
end
|
42
46
|
|
43
47
|
def running?
|
44
|
-
|
45
|
-
pool.running? && @running.true?
|
48
|
+
pool&.running? && timer_task&.running?
|
46
49
|
end
|
47
50
|
|
48
51
|
def shutdown!
|
49
52
|
Utility::Logger.info("Shutting down consumer for #{@index_name} index")
|
50
|
-
|
53
|
+
|
54
|
+
timer_task.shutdown
|
51
55
|
pool.shutdown
|
52
56
|
pool.wait_for_termination(@termination_timeout)
|
53
|
-
|
54
|
-
@pool = nil
|
57
|
+
reset_pool!
|
55
58
|
end
|
56
59
|
|
57
60
|
private
|
58
61
|
|
59
|
-
|
60
|
-
Utility::Logger.info("Starting a new consumer for #{@index_name} index")
|
61
|
-
|
62
|
-
Thread.new do
|
63
|
-
# assign a name to the thread
|
64
|
-
# see @TODO in #self.running?
|
65
|
-
Thread.current[:name] = "consumer-group-#{@index_name}"
|
66
|
-
|
67
|
-
loop do
|
68
|
-
if @running.false?
|
69
|
-
Utility::Logger.info('Shutting down the loop')
|
70
|
-
break
|
71
|
-
end
|
72
|
-
|
73
|
-
sleep(@poll_interval)
|
74
|
-
Utility::Logger.debug('Getting registered connectors')
|
75
|
-
|
76
|
-
connectors = ready_for_sync_connectors
|
77
|
-
next unless connectors.any?
|
78
|
-
|
79
|
-
Utility::Logger.debug("Number of available connectors: #{connectors.size}")
|
80
|
-
|
81
|
-
# @TODO It is assumed that @index_name is used to retrive pending jobs.
|
82
|
-
# This will be discussed after 8.6 release
|
83
|
-
pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
|
84
|
-
Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
|
85
|
-
|
86
|
-
pending_jobs.each do |job|
|
87
|
-
connector_settings = connectors[job.connector_id]
|
88
|
-
|
89
|
-
pool.post do
|
90
|
-
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
91
|
-
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
92
|
-
job_runner = Core::SyncJobRunner.new(
|
93
|
-
connector_settings,
|
94
|
-
job,
|
95
|
-
@max_ingestion_queue_size,
|
96
|
-
@max_ingestion_queue_bytes
|
97
|
-
)
|
98
|
-
job_runner.execute
|
99
|
-
rescue Core::JobAlreadyRunningError
|
100
|
-
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
101
|
-
rescue Core::ConnectorVersionChangedError => e
|
102
|
-
Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
|
103
|
-
rescue StandardError => e
|
104
|
-
Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
|
105
|
-
end
|
106
|
-
end
|
107
|
-
rescue StandardError => e
|
108
|
-
Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
|
109
|
-
end
|
110
|
-
end
|
62
|
+
attr_reader :pool, :timer_task
|
111
63
|
|
112
|
-
|
64
|
+
def start_timer_task!
|
65
|
+
@timer_task = Concurrent::TimerTask.execute(execution_interval: @poll_interval, run_now: true) { execute }
|
113
66
|
end
|
114
67
|
|
115
|
-
def
|
116
|
-
@pool
|
68
|
+
def start_thread_pool!
|
69
|
+
@pool = Concurrent::ThreadPoolExecutor.new(
|
117
70
|
min_threads: @min_threads,
|
118
71
|
max_threads: @max_threads,
|
119
72
|
max_queue: @max_queue,
|
@@ -122,6 +75,51 @@ module Core
|
|
122
75
|
)
|
123
76
|
end
|
124
77
|
|
78
|
+
def reset_pool!
|
79
|
+
@pool = nil
|
80
|
+
end
|
81
|
+
|
82
|
+
def execute
|
83
|
+
Utility::Logger.debug('Getting registered connectors')
|
84
|
+
|
85
|
+
connectors = ready_for_sync_connectors
|
86
|
+
return unless connectors.any?
|
87
|
+
|
88
|
+
Utility::Logger.debug("Number of available connectors: #{connectors.size}")
|
89
|
+
|
90
|
+
# @TODO It is assumed that @index_name is used to retrive pending jobs.
|
91
|
+
# This will be discussed after 8.6 release
|
92
|
+
pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
|
93
|
+
Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
|
94
|
+
|
95
|
+
pending_jobs.each do |job|
|
96
|
+
connector_settings = connectors[job.connector_id]
|
97
|
+
execute_job(job, connector_settings)
|
98
|
+
end
|
99
|
+
rescue StandardError => e
|
100
|
+
Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
|
101
|
+
end
|
102
|
+
|
103
|
+
def execute_job(job, connector_settings)
|
104
|
+
pool.post do
|
105
|
+
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
106
|
+
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
107
|
+
job_runner = Core::SyncJobRunner.new(
|
108
|
+
connector_settings,
|
109
|
+
job,
|
110
|
+
@max_ingestion_queue_size,
|
111
|
+
@max_ingestion_queue_bytes
|
112
|
+
)
|
113
|
+
job_runner.execute
|
114
|
+
rescue Core::JobAlreadyRunningError
|
115
|
+
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
116
|
+
rescue Core::ConnectorVersionChangedError => e
|
117
|
+
Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
|
118
|
+
rescue StandardError => e
|
119
|
+
Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
125
123
|
def ready_for_sync_connectors
|
126
124
|
@scheduler.connector_settings
|
127
125
|
.select(&:ready_for_sync?)
|
data/lib/core/jobs/producer.rb
CHANGED
data/lib/core/scheduler.rb
CHANGED
@@ -44,17 +44,14 @@ module Core
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
47
|
-
|
47
|
+
log_authorization_error(e)
|
48
48
|
rescue StandardError => e
|
49
|
-
|
49
|
+
log_standard_error(e)
|
50
50
|
ensure
|
51
51
|
if @is_shutting_down
|
52
52
|
break
|
53
53
|
end
|
54
|
-
|
55
|
-
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
56
|
-
sleep(@poll_interval)
|
57
|
-
end
|
54
|
+
sleep_for_poll_interval
|
58
55
|
end
|
59
56
|
end
|
60
57
|
|
@@ -78,56 +75,12 @@ module Core
|
|
78
75
|
end
|
79
76
|
|
80
77
|
# Sync when sync_now flag is true for the connector
|
81
|
-
if connector_settings
|
78
|
+
if connector_settings.sync_now?
|
82
79
|
Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
|
83
80
|
return true
|
84
81
|
end
|
85
82
|
|
86
|
-
|
87
|
-
scheduling_settings = connector_settings.scheduling_settings
|
88
|
-
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
89
|
-
Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
|
90
|
-
return false
|
91
|
-
end
|
92
|
-
|
93
|
-
# We want to sync when sync never actually happened
|
94
|
-
last_synced = connector_settings[:last_synced]
|
95
|
-
if last_synced.nil? || last_synced.empty?
|
96
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
97
|
-
return true
|
98
|
-
end
|
99
|
-
|
100
|
-
current_schedule = scheduling_settings[:interval]
|
101
|
-
|
102
|
-
# Don't sync if there is no actual scheduling interval
|
103
|
-
if current_schedule.nil? || current_schedule.empty?
|
104
|
-
Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
|
105
|
-
return false
|
106
|
-
end
|
107
|
-
|
108
|
-
current_schedule = begin
|
109
|
-
Utility::Cron.quartz_to_crontab(current_schedule)
|
110
|
-
rescue StandardError => e
|
111
|
-
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
112
|
-
return false
|
113
|
-
end
|
114
|
-
cron_parser = Fugit::Cron.parse(current_schedule)
|
115
|
-
|
116
|
-
# Don't sync if the scheduling interval is non-parsable
|
117
|
-
unless cron_parser
|
118
|
-
Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
119
|
-
return false
|
120
|
-
end
|
121
|
-
|
122
|
-
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
123
|
-
|
124
|
-
# Sync if next trigger for the connector is in past
|
125
|
-
if next_trigger_time < Time.now
|
126
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
127
|
-
return true
|
128
|
-
end
|
129
|
-
|
130
|
-
false
|
83
|
+
schedule_triggered?(connector_settings.full_sync_scheduling, connector_settings.formatted)
|
131
84
|
end
|
132
85
|
|
133
86
|
def heartbeat_triggered?(connector_settings)
|
@@ -148,6 +101,12 @@ module Core
|
|
148
101
|
end
|
149
102
|
|
150
103
|
def filtering_validation_triggered?(connector_settings)
|
104
|
+
unless connector_settings.any_filtering_feature_enabled?
|
105
|
+
Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
|
106
|
+
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
|
151
110
|
filtering = connector_settings.filtering
|
152
111
|
|
153
112
|
unless filtering.present?
|
@@ -189,5 +148,61 @@ module Core
|
|
189
148
|
false
|
190
149
|
end
|
191
150
|
end
|
151
|
+
|
152
|
+
def schedule_triggered?(scheduling_settings, identifier)
|
153
|
+
# Don't sync if sync is explicitly disabled
|
154
|
+
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
155
|
+
Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
current_schedule = scheduling_settings[:interval]
|
160
|
+
|
161
|
+
# Don't sync if there is no actual scheduling interval
|
162
|
+
if current_schedule.nil? || current_schedule.empty?
|
163
|
+
Utility::Logger.warn("No sync schedule configured for #{identifier}.")
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
current_schedule =
|
168
|
+
begin
|
169
|
+
Utility::Cron.quartz_to_crontab(current_schedule)
|
170
|
+
rescue StandardError => e
|
171
|
+
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
cron_parser = Fugit::Cron.parse(current_schedule)
|
175
|
+
|
176
|
+
# Don't sync if the scheduling interval is non-parsable
|
177
|
+
unless cron_parser
|
178
|
+
Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
179
|
+
return false
|
180
|
+
end
|
181
|
+
|
182
|
+
next_trigger_time = cron_parser.next_time(Time.now)
|
183
|
+
|
184
|
+
# Sync if next trigger happens before the next poll
|
185
|
+
if next_trigger_time <= Time.now + @poll_interval
|
186
|
+
Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
187
|
+
return true
|
188
|
+
end
|
189
|
+
|
190
|
+
false
|
191
|
+
end
|
192
|
+
|
193
|
+
def sleep_for_poll_interval
|
194
|
+
if @poll_interval > 0 && !@is_shutting_down
|
195
|
+
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
196
|
+
sleep(@poll_interval)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def log_authorization_error(e)
|
201
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
202
|
+
end
|
203
|
+
|
204
|
+
def log_standard_error(e)
|
205
|
+
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
206
|
+
end
|
192
207
|
end
|
193
208
|
end
|