connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +10 -8
  3. data/lib/app/config.rb +6 -1
  4. data/lib/app/console_app.rb +1 -1
  5. data/lib/app/dispatcher.rb +18 -3
  6. data/lib/connectors/base/connector.rb +39 -22
  7. data/lib/connectors/crawler/scheduler.rb +36 -0
  8. data/lib/connectors/example/connector.rb +2 -2
  9. data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
  10. data/lib/connectors/gitlab/connector.rb +4 -4
  11. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
  12. data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
  13. data/lib/connectors/mongodb/connector.rb +66 -56
  14. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
  17. data/lib/connectors/registry.rb +1 -1
  18. data/lib/connectors/tolerable_error_helper.rb +5 -1
  19. data/lib/connectors_utility.rb +6 -3
  20. data/lib/core/configuration.rb +13 -1
  21. data/lib/core/connector_job.rb +48 -7
  22. data/lib/core/connector_settings.rb +52 -20
  23. data/lib/core/elastic_connector_actions.rb +54 -38
  24. data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
  25. data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
  26. data/lib/core/filtering/filter_validator.rb +103 -0
  27. data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
  28. data/lib/core/filtering/post_process_engine.rb +2 -2
  29. data/lib/core/filtering/processing_stage.rb +20 -0
  30. data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
  31. data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
  32. data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
  33. data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
  34. data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
  35. data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
  36. data/lib/core/filtering/transform/filter_transformer.rb +26 -0
  37. data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
  38. data/lib/core/filtering/transform/transformation_target.rb +10 -0
  39. data/lib/core/filtering/validation_job_runner.rb +1 -3
  40. data/lib/core/filtering.rb +5 -3
  41. data/lib/core/job_cleanup.rb +66 -0
  42. data/lib/core/jobs/consumer.rb +62 -64
  43. data/lib/core/jobs/producer.rb +3 -0
  44. data/lib/core/scheduler.rb +67 -52
  45. data/lib/core/sync_job_runner.rb +170 -83
  46. data/lib/core.rb +1 -0
  47. data/lib/utility/bulk_queue.rb +1 -1
  48. data/lib/utility/constants.rb +0 -2
  49. data/lib/utility/error_monitor.rb +26 -5
  50. data/lib/utility/es_client.rb +4 -0
  51. data/lib/utility/filtering.rb +4 -0
  52. metadata +32 -21
  53. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  54. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  55. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
@@ -0,0 +1,66 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core'
10
+ require 'utility/logger'
11
+
12
+ module Core
13
+ class JobCleanUp
14
+ class << self
15
+ def execute(connector_id = nil)
16
+ process_orphaned_jobs
17
+ process_idle_jobs(connector_id)
18
+ end
19
+
20
+ private
21
+
22
+ def process_orphaned_jobs
23
+ Utility::Logger.debug('Start cleaning up orphaned jobs...')
24
+ all_connectors = ConnectorSettings.fetch_all_connectors
25
+ orphaned_jobs = ConnectorJob.orphaned_jobs(all_connectors.map(&:id))
26
+ if orphaned_jobs.empty?
27
+ Utility::Logger.debug('No orphaned jobs found. Skipping...')
28
+ return
29
+ end
30
+
31
+ # delete content indicies in case they are re-created by sync job
32
+ content_indices = (orphaned_jobs.map(&:index_name) - all_connectors.map(&:index_name)).compact.uniq
33
+ ElasticConnectorActions.delete_indices(content_indices) if content_indices.any?
34
+ result = ConnectorJob.delete_jobs(orphaned_jobs)
35
+ Utility::Logger.error("Error found when deleting jobs: #{result['failures']}") if result['failures']&.any?
36
+ Utility::Logger.info("Successfully deleted #{result['deleted']} out of #{result['total']} orphaned jobs.")
37
+ end
38
+
39
+ def process_idle_jobs(connector_id = nil)
40
+ Utility::Logger.debug("Start cleaning up idle jobs for #{connector_id ? "connector #{connector_id}" : 'native connectors'}...")
41
+ idle_jobs = ConnectorJob.idle_jobs(connector_id)
42
+ if idle_jobs.empty?
43
+ Utility::Logger.debug('No idle jobs found. Skipping...')
44
+ return
45
+ end
46
+
47
+ marked_count = 0
48
+ idle_jobs.each do |job|
49
+ job.error!('The job has not seen any update for some time.')
50
+ Utility::Logger.debug("Successfully marked job #{job.id} as error.")
51
+
52
+ job_id = job.id
53
+ job = ConnectorJob.fetch_by_id(job_id)
54
+ Utility::Logger.warn("Could not found job by id #{job_id}") if job.nil?
55
+ Utility::Logger.warn("Could not found connector by id #{job.connector_id}") if job && job.connector.nil?
56
+
57
+ job&.connector&.update_last_sync!(job)
58
+ marked_count += 1
59
+ rescue StandardError => e
60
+ Utility::ExceptionTracking.log_exception(e)
61
+ end
62
+ Utility::Logger.info("Successfully marked #{marked_count} out of #{idle_jobs.count} idle jobs as error.")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -6,7 +6,11 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
+ require 'utility/logger'
9
10
  require 'utility/constants'
11
+ require 'core/connector_job'
12
+ require 'core/sync_job_runner'
13
+ require 'concurrent'
10
14
 
11
15
  module Core
12
16
  module Jobs
@@ -30,90 +34,39 @@ module Core
30
34
 
31
35
  @max_ingestion_queue_size = max_ingestion_queue_size
32
36
  @max_ingestion_queue_bytes = max_ingestion_queue_bytes
33
-
34
- @running = Concurrent::AtomicBoolean.new(false)
35
37
  end
36
38
 
37
39
  def subscribe!(index_name:)
38
- @index_name = index_name
40
+ Utility::Logger.info("Starting a new consumer for #{index_name} index")
39
41
 
40
- start_loop!
42
+ @index_name = index_name
43
+ start_timer_task!
44
+ start_thread_pool!
41
45
  end
42
46
 
43
47
  def running?
44
- # @TODO check if a loop thread is alive
45
- pool.running? && @running.true?
48
+ pool&.running? && timer_task&.running?
46
49
  end
47
50
 
48
51
  def shutdown!
49
52
  Utility::Logger.info("Shutting down consumer for #{@index_name} index")
50
- @running.make_false
53
+
54
+ timer_task.shutdown
51
55
  pool.shutdown
52
56
  pool.wait_for_termination(@termination_timeout)
53
- # reset pool
54
- @pool = nil
57
+ reset_pool!
55
58
  end
56
59
 
57
60
  private
58
61
 
59
- def start_loop!
60
- Utility::Logger.info("Starting a new consumer for #{@index_name} index")
61
-
62
- Thread.new do
63
- # assign a name to the thread
64
- # see @TODO in #self.running?
65
- Thread.current[:name] = "consumer-group-#{@index_name}"
66
-
67
- loop do
68
- if @running.false?
69
- Utility::Logger.info('Shutting down the loop')
70
- break
71
- end
72
-
73
- sleep(@poll_interval)
74
- Utility::Logger.debug('Getting registered connectors')
75
-
76
- connectors = ready_for_sync_connectors
77
- next unless connectors.any?
78
-
79
- Utility::Logger.debug("Number of available connectors: #{connectors.size}")
80
-
81
- # @TODO It is assumed that @index_name is used to retrive pending jobs.
82
- # This will be discussed after 8.6 release
83
- pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
84
- Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
85
-
86
- pending_jobs.each do |job|
87
- connector_settings = connectors[job.connector_id]
88
-
89
- pool.post do
90
- Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
91
- Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
92
- job_runner = Core::SyncJobRunner.new(
93
- connector_settings,
94
- job,
95
- @max_ingestion_queue_size,
96
- @max_ingestion_queue_bytes
97
- )
98
- job_runner.execute
99
- rescue Core::JobAlreadyRunningError
100
- Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
101
- rescue Core::ConnectorVersionChangedError => e
102
- Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
103
- rescue StandardError => e
104
- Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
105
- end
106
- end
107
- rescue StandardError => e
108
- Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
109
- end
110
- end
62
+ attr_reader :pool, :timer_task
111
63
 
112
- @running.make_true
64
+ def start_timer_task!
65
+ @timer_task = Concurrent::TimerTask.execute(execution_interval: @poll_interval, run_now: true) { execute }
113
66
  end
114
67
 
115
- def pool
116
- @pool ||= Concurrent::ThreadPoolExecutor.new(
68
+ def start_thread_pool!
69
+ @pool = Concurrent::ThreadPoolExecutor.new(
117
70
  min_threads: @min_threads,
118
71
  max_threads: @max_threads,
119
72
  max_queue: @max_queue,
@@ -122,6 +75,51 @@ module Core
122
75
  )
123
76
  end
124
77
 
78
+ def reset_pool!
79
+ @pool = nil
80
+ end
81
+
82
+ def execute
83
+ Utility::Logger.debug('Getting registered connectors')
84
+
85
+ connectors = ready_for_sync_connectors
86
+ return unless connectors.any?
87
+
88
+ Utility::Logger.debug("Number of available connectors: #{connectors.size}")
89
+
90
+ # @TODO It is assumed that @index_name is used to retrive pending jobs.
91
+ # This will be discussed after 8.6 release
92
+ pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
93
+ Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
94
+
95
+ pending_jobs.each do |job|
96
+ connector_settings = connectors[job.connector_id]
97
+ execute_job(job, connector_settings)
98
+ end
99
+ rescue StandardError => e
100
+ Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
101
+ end
102
+
103
+ def execute_job(job, connector_settings)
104
+ pool.post do
105
+ Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
106
+ Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
107
+ job_runner = Core::SyncJobRunner.new(
108
+ connector_settings,
109
+ job,
110
+ @max_ingestion_queue_size,
111
+ @max_ingestion_queue_bytes
112
+ )
113
+ job_runner.execute
114
+ rescue Core::JobAlreadyRunningError
115
+ Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
116
+ rescue Core::ConnectorVersionChangedError => e
117
+ Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
118
+ rescue StandardError => e
119
+ Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
120
+ end
121
+ end
122
+
125
123
  def ready_for_sync_connectors
126
124
  @scheduler.connector_settings
127
125
  .select(&:ready_for_sync?)
@@ -6,6 +6,9 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
+ require 'core/connector_settings'
10
+ require 'core/elastic_connector_actions'
11
+
9
12
  module Core
10
13
  module Jobs
11
14
  class Producer
@@ -44,17 +44,14 @@ module Core
44
44
  end
45
45
  end
46
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
47
- Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
+ log_authorization_error(e)
48
48
  rescue StandardError => e
49
- Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
+ log_standard_error(e)
50
50
  ensure
51
51
  if @is_shutting_down
52
52
  break
53
53
  end
54
- if @poll_interval > 0 && !@is_shutting_down
55
- Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
56
- sleep(@poll_interval)
57
- end
54
+ sleep_for_poll_interval
58
55
  end
59
56
  end
60
57
 
@@ -78,56 +75,12 @@ module Core
78
75
  end
79
76
 
80
77
  # Sync when sync_now flag is true for the connector
81
- if connector_settings[:sync_now] == true
78
+ if connector_settings.sync_now?
82
79
  Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
83
80
  return true
84
81
  end
85
82
 
86
- # Don't sync if sync is explicitly disabled
87
- scheduling_settings = connector_settings.scheduling_settings
88
- unless scheduling_settings.present? && scheduling_settings[:enabled] == true
89
- Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
90
- return false
91
- end
92
-
93
- # We want to sync when sync never actually happened
94
- last_synced = connector_settings[:last_synced]
95
- if last_synced.nil? || last_synced.empty?
96
- Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
97
- return true
98
- end
99
-
100
- current_schedule = scheduling_settings[:interval]
101
-
102
- # Don't sync if there is no actual scheduling interval
103
- if current_schedule.nil? || current_schedule.empty?
104
- Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
105
- return false
106
- end
107
-
108
- current_schedule = begin
109
- Utility::Cron.quartz_to_crontab(current_schedule)
110
- rescue StandardError => e
111
- Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
112
- return false
113
- end
114
- cron_parser = Fugit::Cron.parse(current_schedule)
115
-
116
- # Don't sync if the scheduling interval is non-parsable
117
- unless cron_parser
118
- Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
119
- return false
120
- end
121
-
122
- next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
123
-
124
- # Sync if next trigger for the connector is in past
125
- if next_trigger_time < Time.now
126
- Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
127
- return true
128
- end
129
-
130
- false
83
+ schedule_triggered?(connector_settings.full_sync_scheduling, connector_settings.formatted)
131
84
  end
132
85
 
133
86
  def heartbeat_triggered?(connector_settings)
@@ -148,6 +101,12 @@ module Core
148
101
  end
149
102
 
150
103
  def filtering_validation_triggered?(connector_settings)
104
+ unless connector_settings.any_filtering_feature_enabled?
105
+ Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
106
+
107
+ return false
108
+ end
109
+
151
110
  filtering = connector_settings.filtering
152
111
 
153
112
  unless filtering.present?
@@ -189,5 +148,61 @@ module Core
189
148
  false
190
149
  end
191
150
  end
151
+
152
+ def schedule_triggered?(scheduling_settings, identifier)
153
+ # Don't sync if sync is explicitly disabled
154
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
155
+ Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
156
+ return false
157
+ end
158
+
159
+ current_schedule = scheduling_settings[:interval]
160
+
161
+ # Don't sync if there is no actual scheduling interval
162
+ if current_schedule.nil? || current_schedule.empty?
163
+ Utility::Logger.warn("No sync schedule configured for #{identifier}.")
164
+ return false
165
+ end
166
+
167
+ current_schedule =
168
+ begin
169
+ Utility::Cron.quartz_to_crontab(current_schedule)
170
+ rescue StandardError => e
171
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
172
+ return false
173
+ end
174
+ cron_parser = Fugit::Cron.parse(current_schedule)
175
+
176
+ # Don't sync if the scheduling interval is non-parsable
177
+ unless cron_parser
178
+ Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
179
+ return false
180
+ end
181
+
182
+ next_trigger_time = cron_parser.next_time(Time.now)
183
+
184
+ # Sync if next trigger happens before the next poll
185
+ if next_trigger_time <= Time.now + @poll_interval
186
+ Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
187
+ return true
188
+ end
189
+
190
+ false
191
+ end
192
+
193
+ def sleep_for_poll_interval
194
+ if @poll_interval > 0 && !@is_shutting_down
195
+ Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
196
+ sleep(@poll_interval)
197
+ end
198
+ end
199
+
200
+ def log_authorization_error(e)
201
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
202
+ end
203
+
204
+ def log_standard_error(e)
205
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
206
+ end
192
207
  end
193
208
  end