connectors_utility 8.6.0.7 → 8.7.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2406276300699ebf07cc2890f994c3a1a9fecb9127793bcad68fb69a4f49111
4
- data.tar.gz: 0a35b249367837148c2f7fe2eac30f74de37c5fd5610f30859c703bfbcbb76be
3
+ metadata.gz: 8bde5d9fcfd7af80dd1a20bc3fdffb3e509af46fc492607fa963858aacdb79bc
4
+ data.tar.gz: b3f26fba69d08e1add58b476a37a74f3fa855790d9bcea05c7a4f5ed3b1fd9bf
5
5
  SHA512:
6
- metadata.gz: e0cca6acc1faffd3312213d807e77425ed006f44313c3b49faf01c8f3b31817ef7a194249581d6058cc8309773fa1142f5daf8994a2a8e24533f1b3f08f73efd
7
- data.tar.gz: '078ebf6f05133d0b3ffe252157e72bcfaee89fb98e65d8ffde1a400c69a95f1ea4edfd7dfd66a626955aa24a0e29217cb6f51ead3e30c1c1d2792d52c7ffbb33'
6
+ metadata.gz: 1eb4c63b6ae46d11b8b8e01224e1e1943a1971b7e12054978ebab97c99939fb1d0316b3ff06912d4908e8f34df482477e4e8d1f9b53c02fcebc809e27b597d2a
7
+ data.tar.gz: 2300a3a9c32ed95a1c25a54fba4737bb052caee10756887f2ac114948ec8a8ba0df066ee30420f5e3d64e0f5855df7e487f4782bc242e27ea66b592c6c60dbe6
@@ -22,11 +22,47 @@ module Connectors
22
22
  []
23
23
  end
24
24
 
25
+ def when_triggered
26
+ loop do
27
+ connector_settings.each do |cs|
28
+ # crawler only supports :sync
29
+ if sync_triggered?(cs)
30
+ yield cs, :sync, nil
31
+ next
32
+ end
33
+
34
+ schedule_key = custom_schedule_triggered(cs)
35
+ yield cs, :sync, schedule_key if schedule_key
36
+ end
37
+ rescue *Utility::AUTHORIZATION_ERRORS => e
38
+ log_authorization_error(e)
39
+ rescue StandardError => e
40
+ log_standard_error(e)
41
+ ensure
42
+ if @is_shutting_down
43
+ break
44
+ end
45
+ sleep_for_poll_interval
46
+ end
47
+ end
48
+
25
49
  private
26
50
 
27
51
  def connector_registered?(service_type)
28
52
  service_type == 'elastic-crawler'
29
53
  end
54
+
55
+ # custom scheduling has no ordering, so the first-found schedule is returned
56
+ def custom_schedule_triggered(cs)
57
+ cs.custom_scheduling_settings.each do |key, custom_scheduling|
58
+ identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59
+ if schedule_triggered?(custom_scheduling, identifier)
60
+ return key
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
30
66
  end
31
67
  end
32
68
  end
@@ -15,7 +15,7 @@ require 'utility'
15
15
  module Core
16
16
  class ConnectorJob
17
17
  DEFAULT_PAGE_SIZE = 100
18
- STUCK_THRESHOLD = 60
18
+ IDLE_THRESHOLD = 60
19
19
 
20
20
  def self.fetch_by_id(job_id)
21
21
  es_response = ElasticConnectorActions.get_job(job_id)
@@ -36,8 +36,7 @@ module Core
36
36
  fetch_jobs_by_query(query, page_size)
37
37
  end
38
38
 
39
- def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
40
- connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
39
+ def self.orphaned_jobs(connector_ids = [], page_size = DEFAULT_PAGE_SIZE)
41
40
  query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
42
41
  fetch_jobs_by_query(query, page_size)
43
42
  end
@@ -47,7 +46,7 @@ module Core
47
46
  ElasticConnectorActions.delete_jobs_by_query(query)
48
47
  end
49
48
 
50
- def self.stuck_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
49
+ def self.idle_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
51
50
  connector_ids = if connector_id
52
51
  [connector_id]
53
52
  else
@@ -58,7 +57,7 @@ module Core
58
57
  filter: [
59
58
  { terms: { 'connector.id': connector_ids } },
60
59
  { terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
61
- { range: { last_seen: { lte: "now-#{STUCK_THRESHOLD}s" } } }
60
+ { range: { last_seen: { lte: "now-#{IDLE_THRESHOLD}s" } } }
62
61
  ]
63
62
  }
64
63
  }
@@ -64,6 +64,24 @@ module Core
64
64
  @elasticsearch_response[:_source][property_name]
65
65
  end
66
66
 
67
+ def features
68
+ self[:features] || {}
69
+ end
70
+
71
+ # .dig version is the modern features way of doing things,
72
+ # Right-hand of OR operator is legacy features support
73
+ # When this is fixed with a migration, we can go ahead
74
+ def filtering_rule_feature_enabled?
75
+ !!features.dig(:sync_rules, :basic, :enabled) || !!features[:filtering_rules]
76
+ end
77
+ def filtering_advanced_config_feature_enabled?
78
+ !!features.dig(:sync_rules, :advanced, :enabled) || !!features[:filtering_advanced_config]
79
+ end
80
+
81
+ def any_filtering_feature_enabled?
82
+ filtering_rule_feature_enabled? || filtering_advanced_config_feature_enabled?
83
+ end
84
+
67
85
  def index_name
68
86
  self[:index_name]
69
87
  end
@@ -88,10 +106,18 @@ module Core
88
106
  self[:scheduling]
89
107
  end
90
108
 
109
+ def custom_scheduling_settings
110
+ self[:custom_scheduling]
111
+ end
112
+
91
113
  def sync_now?
92
114
  self[:sync_now] == true
93
115
  end
94
116
 
117
+ def last_synced
118
+ self[:last_synced]
119
+ end
120
+
95
121
  def filtering
96
122
  # assume for now, that first object in filtering array or a filter object itself is the only filtering object
97
123
  filtering = @elasticsearch_response.dig(:_source, :filtering)
@@ -157,12 +157,37 @@ module Core
157
157
  )
158
158
  end
159
159
 
160
- def update_connector_last_sync_status(connector_id, last_sync_status)
160
+ def update_connector_sync_start(connector_id)
161
161
  doc = connector_with_concurrency_control(connector_id)
162
162
 
163
+ body = {
164
+ last_sync_status: Connectors::SyncStatus::IN_PROGRESS,
165
+ last_sync_error: nil,
166
+ status: Connectors::ConnectorStatus::CONNECTED
167
+ }
168
+
163
169
  update_connector_fields(
164
170
  connector_id,
165
- { last_sync_status: last_sync_status },
171
+ body,
172
+ doc[:seq_no],
173
+ doc[:primary_term]
174
+ )
175
+ end
176
+
177
+ def update_connector_custom_scheduling_last_synced(connector_id, schedule_key)
178
+ doc = connector_with_concurrency_control(connector_id)
179
+
180
+ body = {
181
+ :custom_scheduling => {
182
+ schedule_key => {
183
+ :last_synced => Time.now
184
+ }
185
+ }
186
+ }
187
+
188
+ update_connector_fields(
189
+ connector_id,
190
+ body,
166
191
  doc[:seq_no],
167
192
  doc[:primary_term]
168
193
  )
@@ -314,9 +339,11 @@ module Core
314
339
  # Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
315
340
  def ensure_connectors_index_exists
316
341
  mappings = {
342
+ :dynamic => false,
317
343
  :properties => {
318
344
  :api_key_id => { :type => :keyword },
319
345
  :configuration => { :type => :object },
346
+ :custom_schedule => { :type => :object },
320
347
  :description => { :type => :text },
321
348
  :error => { :type => :keyword },
322
349
  :features => {
@@ -434,6 +461,7 @@ module Core
434
461
  # Creation of job index should be handled by Kibana, this method is only used by ftest.rb
435
462
  def ensure_job_index_exists
436
463
  mappings = {
464
+ :dynamic => false,
437
465
  :properties => {
438
466
  :cancelation_requested_at => { :type => :date },
439
467
  :canceled_at => { :type => :date },
@@ -511,8 +539,8 @@ module Core
511
539
  end
512
540
 
513
541
  def document_count(index_name)
514
- client.indices.refresh(:index => index_name)
515
- client.count(:index => index_name)['count']
542
+ client.indices.refresh(:index => index_name, :ignore_unavailable => true)
543
+ client.count(:index => index_name, :ignore_unavailable => true)['count']
516
544
  end
517
545
 
518
546
  private
@@ -44,17 +44,14 @@ module Core
44
44
  end
45
45
  end
46
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
47
- Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
+ log_authorization_error(e)
48
48
  rescue StandardError => e
49
- Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
+ log_standard_error(e)
50
50
  ensure
51
51
  if @is_shutting_down
52
52
  break
53
53
  end
54
- if @poll_interval > 0 && !@is_shutting_down
55
- Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
56
- sleep(@poll_interval)
57
- end
54
+ sleep_for_poll_interval
58
55
  end
59
56
  end
60
57
 
@@ -83,51 +80,7 @@ module Core
83
80
  return true
84
81
  end
85
82
 
86
- # Don't sync if sync is explicitly disabled
87
- scheduling_settings = connector_settings.scheduling_settings
88
- unless scheduling_settings.present? && scheduling_settings[:enabled] == true
89
- Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
90
- return false
91
- end
92
-
93
- current_schedule = scheduling_settings[:interval]
94
-
95
- # Don't sync if there is no actual scheduling interval
96
- if current_schedule.nil? || current_schedule.empty?
97
- Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
98
- return false
99
- end
100
-
101
- current_schedule = begin
102
- Utility::Cron.quartz_to_crontab(current_schedule)
103
- rescue StandardError => e
104
- Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
105
- return false
106
- end
107
- cron_parser = Fugit::Cron.parse(current_schedule)
108
-
109
- # Don't sync if the scheduling interval is non-parsable
110
- unless cron_parser
111
- Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
112
- return false
113
- end
114
-
115
- # We want to sync when sync never actually happened
116
- last_synced = connector_settings[:last_synced]
117
- if last_synced.nil? || last_synced.empty?
118
- Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
119
- return true
120
- end
121
-
122
- next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
123
-
124
- # Sync if next trigger for the connector is in past
125
- if next_trigger_time < Time.now
126
- Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
127
- return true
128
- end
129
-
130
- false
83
+ schedule_triggered?(connector_settings.scheduling_settings, connector_settings.formatted)
131
84
  end
132
85
 
133
86
  def heartbeat_triggered?(connector_settings)
@@ -148,6 +101,12 @@ module Core
148
101
  end
149
102
 
150
103
  def filtering_validation_triggered?(connector_settings)
104
+ unless connector_settings.any_filtering_feature_enabled?
105
+ Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
106
+
107
+ return false
108
+ end
109
+
151
110
  filtering = connector_settings.filtering
152
111
 
153
112
  unless filtering.present?
@@ -189,5 +148,61 @@ module Core
189
148
  false
190
149
  end
191
150
  end
151
+
152
+ def schedule_triggered?(scheduling_settings, identifier)
153
+ # Don't sync if sync is explicitly disabled
154
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
155
+ Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
156
+ return false
157
+ end
158
+
159
+ current_schedule = scheduling_settings[:interval]
160
+
161
+ # Don't sync if there is no actual scheduling interval
162
+ if current_schedule.nil? || current_schedule.empty?
163
+ Utility::Logger.warn("No sync schedule configured for #{identifier}.")
164
+ return false
165
+ end
166
+
167
+ current_schedule =
168
+ begin
169
+ Utility::Cron.quartz_to_crontab(current_schedule)
170
+ rescue StandardError => e
171
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
172
+ return false
173
+ end
174
+ cron_parser = Fugit::Cron.parse(current_schedule)
175
+
176
+ # Don't sync if the scheduling interval is non-parsable
177
+ unless cron_parser
178
+ Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
179
+ return false
180
+ end
181
+
182
+ next_trigger_time = cron_parser.next_time(Time.now)
183
+
184
+ # Sync if next trigger happens before the next poll
185
+ if next_trigger_time <= Time.now + @poll_interval
186
+ Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
187
+ return true
188
+ end
189
+
190
+ false
191
+ end
192
+
193
+ def sleep_for_poll_interval
194
+ if @poll_interval > 0 && !@is_shutting_down
195
+ Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
196
+ sleep(@poll_interval)
197
+ end
198
+ end
199
+
200
+ def log_authorization_error(e)
201
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
202
+ end
203
+
204
+ def log_standard_error(e)
205
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
206
+ end
192
207
  end
193
208
  end
@@ -16,8 +16,6 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
- FILTERING_RULES_FEATURE = 'filtering_rules'
20
- FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
19
 
22
20
  # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
21
  DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
@@ -43,6 +43,10 @@ module Utility
43
43
  configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
44
  configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
45
 
46
+ # headers
47
+ # these are necessary for cloud-hosted native connectors
48
+ configs[:headers] = es_config[:headers].to_h if es_config[:headers]
49
+
46
50
  # if log or trace is activated, we use the application logger
47
51
  configs[:logger] = if configs[:log] || configs[:trace]
48
52
  Utility::Logger.logger
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.7
4
+ version: 8.7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-29 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.2.6
19
+ version: '5.2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.2.6
26
+ version: '5.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: ecs-logging
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -132,7 +132,7 @@ homepage: https://github.com/elastic/connectors-ruby
132
132
  licenses:
133
133
  - Elastic-2.0
134
134
  metadata:
135
- revision: 86b6c162f41d3a837841d0df2430cbc26b57eb33
135
+ revision: ae6292137eef9acac1259c5e7e71a3d0e149210b
136
136
  repository: https://github.com/elastic/connectors-ruby
137
137
  post_install_message:
138
138
  rdoc_options: []