connectors_utility 8.6.0.7 → 8.7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2406276300699ebf07cc2890f994c3a1a9fecb9127793bcad68fb69a4f49111
4
- data.tar.gz: 0a35b249367837148c2f7fe2eac30f74de37c5fd5610f30859c703bfbcbb76be
3
+ metadata.gz: 8bde5d9fcfd7af80dd1a20bc3fdffb3e509af46fc492607fa963858aacdb79bc
4
+ data.tar.gz: b3f26fba69d08e1add58b476a37a74f3fa855790d9bcea05c7a4f5ed3b1fd9bf
5
5
  SHA512:
6
- metadata.gz: e0cca6acc1faffd3312213d807e77425ed006f44313c3b49faf01c8f3b31817ef7a194249581d6058cc8309773fa1142f5daf8994a2a8e24533f1b3f08f73efd
7
- data.tar.gz: '078ebf6f05133d0b3ffe252157e72bcfaee89fb98e65d8ffde1a400c69a95f1ea4edfd7dfd66a626955aa24a0e29217cb6f51ead3e30c1c1d2792d52c7ffbb33'
6
+ metadata.gz: 1eb4c63b6ae46d11b8b8e01224e1e1943a1971b7e12054978ebab97c99939fb1d0316b3ff06912d4908e8f34df482477e4e8d1f9b53c02fcebc809e27b597d2a
7
+ data.tar.gz: 2300a3a9c32ed95a1c25a54fba4737bb052caee10756887f2ac114948ec8a8ba0df066ee30420f5e3d64e0f5855df7e487f4782bc242e27ea66b592c6c60dbe6
@@ -22,11 +22,47 @@ module Connectors
22
22
  []
23
23
  end
24
24
 
25
+ def when_triggered
26
+ loop do
27
+ connector_settings.each do |cs|
28
+ # crawler only supports :sync
29
+ if sync_triggered?(cs)
30
+ yield cs, :sync, nil
31
+ next
32
+ end
33
+
34
+ schedule_key = custom_schedule_triggered(cs)
35
+ yield cs, :sync, schedule_key if schedule_key
36
+ end
37
+ rescue *Utility::AUTHORIZATION_ERRORS => e
38
+ log_authorization_error(e)
39
+ rescue StandardError => e
40
+ log_standard_error(e)
41
+ ensure
42
+ if @is_shutting_down
43
+ break
44
+ end
45
+ sleep_for_poll_interval
46
+ end
47
+ end
48
+
25
49
  private
26
50
 
27
51
  def connector_registered?(service_type)
28
52
  service_type == 'elastic-crawler'
29
53
  end
54
+
55
+ # custom scheduling has no ordering, so the first-found schedule is returned
56
+ def custom_schedule_triggered(cs)
57
+ cs.custom_scheduling_settings.each do |key, custom_scheduling|
58
+ identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59
+ if schedule_triggered?(custom_scheduling, identifier)
60
+ return key
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
30
66
  end
31
67
  end
32
68
  end
@@ -15,7 +15,7 @@ require 'utility'
15
15
  module Core
16
16
  class ConnectorJob
17
17
  DEFAULT_PAGE_SIZE = 100
18
- STUCK_THRESHOLD = 60
18
+ IDLE_THRESHOLD = 60
19
19
 
20
20
  def self.fetch_by_id(job_id)
21
21
  es_response = ElasticConnectorActions.get_job(job_id)
@@ -36,8 +36,7 @@ module Core
36
36
  fetch_jobs_by_query(query, page_size)
37
37
  end
38
38
 
39
- def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
40
- connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
39
+ def self.orphaned_jobs(connector_ids = [], page_size = DEFAULT_PAGE_SIZE)
41
40
  query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
42
41
  fetch_jobs_by_query(query, page_size)
43
42
  end
@@ -47,7 +46,7 @@ module Core
47
46
  ElasticConnectorActions.delete_jobs_by_query(query)
48
47
  end
49
48
 
50
- def self.stuck_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
49
+ def self.idle_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
51
50
  connector_ids = if connector_id
52
51
  [connector_id]
53
52
  else
@@ -58,7 +57,7 @@ module Core
58
57
  filter: [
59
58
  { terms: { 'connector.id': connector_ids } },
60
59
  { terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
61
- { range: { last_seen: { lte: "now-#{STUCK_THRESHOLD}s" } } }
60
+ { range: { last_seen: { lte: "now-#{IDLE_THRESHOLD}s" } } }
62
61
  ]
63
62
  }
64
63
  }
@@ -64,6 +64,24 @@ module Core
64
64
  @elasticsearch_response[:_source][property_name]
65
65
  end
66
66
 
67
+ def features
68
+ self[:features] || {}
69
+ end
70
+
71
+ # .dig version is the modern features way of doing things,
72
+ # Right-hand of OR operator is legacy features support
73
+ # When this is fixed with a migration, we can go ahead
74
+ def filtering_rule_feature_enabled?
75
+ !!features.dig(:sync_rules, :basic, :enabled) || !!features[:filtering_rules]
76
+ end
77
+ def filtering_advanced_config_feature_enabled?
78
+ !!features.dig(:sync_rules, :advanced, :enabled) || !!features[:filtering_advanced_config]
79
+ end
80
+
81
+ def any_filtering_feature_enabled?
82
+ filtering_rule_feature_enabled? || filtering_advanced_config_feature_enabled?
83
+ end
84
+
67
85
  def index_name
68
86
  self[:index_name]
69
87
  end
@@ -88,10 +106,18 @@ module Core
88
106
  self[:scheduling]
89
107
  end
90
108
 
109
+ def custom_scheduling_settings
110
+ self[:custom_scheduling]
111
+ end
112
+
91
113
  def sync_now?
92
114
  self[:sync_now] == true
93
115
  end
94
116
 
117
+ def last_synced
118
+ self[:last_synced]
119
+ end
120
+
95
121
  def filtering
96
122
  # assume for now, that first object in filtering array or a filter object itself is the only filtering object
97
123
  filtering = @elasticsearch_response.dig(:_source, :filtering)
@@ -157,12 +157,37 @@ module Core
157
157
  )
158
158
  end
159
159
 
160
- def update_connector_last_sync_status(connector_id, last_sync_status)
160
+ def update_connector_sync_start(connector_id)
161
161
  doc = connector_with_concurrency_control(connector_id)
162
162
 
163
+ body = {
164
+ last_sync_status: Connectors::SyncStatus::IN_PROGRESS,
165
+ last_sync_error: nil,
166
+ status: Connectors::ConnectorStatus::CONNECTED
167
+ }
168
+
163
169
  update_connector_fields(
164
170
  connector_id,
165
- { last_sync_status: last_sync_status },
171
+ body,
172
+ doc[:seq_no],
173
+ doc[:primary_term]
174
+ )
175
+ end
176
+
177
+ def update_connector_custom_scheduling_last_synced(connector_id, schedule_key)
178
+ doc = connector_with_concurrency_control(connector_id)
179
+
180
+ body = {
181
+ :custom_scheduling => {
182
+ schedule_key => {
183
+ :last_synced => Time.now
184
+ }
185
+ }
186
+ }
187
+
188
+ update_connector_fields(
189
+ connector_id,
190
+ body,
166
191
  doc[:seq_no],
167
192
  doc[:primary_term]
168
193
  )
@@ -314,9 +339,11 @@ module Core
314
339
  # Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
315
340
  def ensure_connectors_index_exists
316
341
  mappings = {
342
+ :dynamic => false,
317
343
  :properties => {
318
344
  :api_key_id => { :type => :keyword },
319
345
  :configuration => { :type => :object },
346
+ :custom_schedule => { :type => :object },
320
347
  :description => { :type => :text },
321
348
  :error => { :type => :keyword },
322
349
  :features => {
@@ -434,6 +461,7 @@ module Core
434
461
  # Creation of job index should be handled by Kibana, this method is only used by ftest.rb
435
462
  def ensure_job_index_exists
436
463
  mappings = {
464
+ :dynamic => false,
437
465
  :properties => {
438
466
  :cancelation_requested_at => { :type => :date },
439
467
  :canceled_at => { :type => :date },
@@ -511,8 +539,8 @@ module Core
511
539
  end
512
540
 
513
541
  def document_count(index_name)
514
- client.indices.refresh(:index => index_name)
515
- client.count(:index => index_name)['count']
542
+ client.indices.refresh(:index => index_name, :ignore_unavailable => true)
543
+ client.count(:index => index_name, :ignore_unavailable => true)['count']
516
544
  end
517
545
 
518
546
  private
@@ -44,17 +44,14 @@ module Core
44
44
  end
45
45
  end
46
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
47
- Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
+ log_authorization_error(e)
48
48
  rescue StandardError => e
49
- Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
+ log_standard_error(e)
50
50
  ensure
51
51
  if @is_shutting_down
52
52
  break
53
53
  end
54
- if @poll_interval > 0 && !@is_shutting_down
55
- Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
56
- sleep(@poll_interval)
57
- end
54
+ sleep_for_poll_interval
58
55
  end
59
56
  end
60
57
 
@@ -83,51 +80,7 @@ module Core
83
80
  return true
84
81
  end
85
82
 
86
- # Don't sync if sync is explicitly disabled
87
- scheduling_settings = connector_settings.scheduling_settings
88
- unless scheduling_settings.present? && scheduling_settings[:enabled] == true
89
- Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
90
- return false
91
- end
92
-
93
- current_schedule = scheduling_settings[:interval]
94
-
95
- # Don't sync if there is no actual scheduling interval
96
- if current_schedule.nil? || current_schedule.empty?
97
- Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
98
- return false
99
- end
100
-
101
- current_schedule = begin
102
- Utility::Cron.quartz_to_crontab(current_schedule)
103
- rescue StandardError => e
104
- Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
105
- return false
106
- end
107
- cron_parser = Fugit::Cron.parse(current_schedule)
108
-
109
- # Don't sync if the scheduling interval is non-parsable
110
- unless cron_parser
111
- Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
112
- return false
113
- end
114
-
115
- # We want to sync when sync never actually happened
116
- last_synced = connector_settings[:last_synced]
117
- if last_synced.nil? || last_synced.empty?
118
- Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
119
- return true
120
- end
121
-
122
- next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
123
-
124
- # Sync if next trigger for the connector is in past
125
- if next_trigger_time < Time.now
126
- Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
127
- return true
128
- end
129
-
130
- false
83
+ schedule_triggered?(connector_settings.scheduling_settings, connector_settings.formatted)
131
84
  end
132
85
 
133
86
  def heartbeat_triggered?(connector_settings)
@@ -148,6 +101,12 @@ module Core
148
101
  end
149
102
 
150
103
  def filtering_validation_triggered?(connector_settings)
104
+ unless connector_settings.any_filtering_feature_enabled?
105
+ Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
106
+
107
+ return false
108
+ end
109
+
151
110
  filtering = connector_settings.filtering
152
111
 
153
112
  unless filtering.present?
@@ -189,5 +148,61 @@ module Core
189
148
  false
190
149
  end
191
150
  end
151
+
152
+ def schedule_triggered?(scheduling_settings, identifier)
153
+ # Don't sync if sync is explicitly disabled
154
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
155
+ Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
156
+ return false
157
+ end
158
+
159
+ current_schedule = scheduling_settings[:interval]
160
+
161
+ # Don't sync if there is no actual scheduling interval
162
+ if current_schedule.nil? || current_schedule.empty?
163
+ Utility::Logger.warn("No sync schedule configured for #{identifier}.")
164
+ return false
165
+ end
166
+
167
+ current_schedule =
168
+ begin
169
+ Utility::Cron.quartz_to_crontab(current_schedule)
170
+ rescue StandardError => e
171
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
172
+ return false
173
+ end
174
+ cron_parser = Fugit::Cron.parse(current_schedule)
175
+
176
+ # Don't sync if the scheduling interval is non-parsable
177
+ unless cron_parser
178
+ Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
179
+ return false
180
+ end
181
+
182
+ next_trigger_time = cron_parser.next_time(Time.now)
183
+
184
+ # Sync if next trigger happens before the next poll
185
+ if next_trigger_time <= Time.now + @poll_interval
186
+ Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
187
+ return true
188
+ end
189
+
190
+ false
191
+ end
192
+
193
+ def sleep_for_poll_interval
194
+ if @poll_interval > 0 && !@is_shutting_down
195
+ Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
196
+ sleep(@poll_interval)
197
+ end
198
+ end
199
+
200
+ def log_authorization_error(e)
201
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
202
+ end
203
+
204
+ def log_standard_error(e)
205
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
206
+ end
192
207
  end
193
208
  end
@@ -16,8 +16,6 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
- FILTERING_RULES_FEATURE = 'filtering_rules'
20
- FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
19
 
22
20
  # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
21
  DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
@@ -43,6 +43,10 @@ module Utility
43
43
  configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
44
  configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
45
 
46
+ # headers
47
+ # these are necessary for cloud-hosted native connectors
48
+ configs[:headers] = es_config[:headers].to_h if es_config[:headers]
49
+
46
50
  # if log or trace is activated, we use the application logger
47
51
  configs[:logger] = if configs[:log] || configs[:trace]
48
52
  Utility::Logger.logger
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.7
4
+ version: 8.7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-29 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.2.6
19
+ version: '5.2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.2.6
26
+ version: '5.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: ecs-logging
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -132,7 +132,7 @@ homepage: https://github.com/elastic/connectors-ruby
132
132
  licenses:
133
133
  - Elastic-2.0
134
134
  metadata:
135
- revision: 86b6c162f41d3a837841d0df2430cbc26b57eb33
135
+ revision: ae6292137eef9acac1259c5e7e71a3d0e149210b
136
136
  repository: https://github.com/elastic/connectors-ruby
137
137
  post_install_message:
138
138
  rdoc_options: []