connectors_utility 8.7.0.0.pre.20221117T004939Z → 8.7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d2972f8e6974a79b6088ce6c03453c327132ce19ffb09dbf30f349eae4c2108
4
- data.tar.gz: 4fd458de07be07923e0675dc0f341b8211ba3daeec8ac27bfb4f9eb9aff2334a
3
+ metadata.gz: 8bde5d9fcfd7af80dd1a20bc3fdffb3e509af46fc492607fa963858aacdb79bc
4
+ data.tar.gz: b3f26fba69d08e1add58b476a37a74f3fa855790d9bcea05c7a4f5ed3b1fd9bf
5
5
  SHA512:
6
- metadata.gz: 9db02a3003d5645cbb5d57d4ca1bdb1acb65234f5a931afd9ccb06e2fbbe25be2394c65a72a9ae36038c8c127c35a0d937c83558ede0e4960fc688b073db052a
7
- data.tar.gz: d02681e0d4009420b949ec649c9eac52bd533eee493c181ea1ffb15d939b561e81f234bac4885eefd0ea82c8564d7d330c0635c535b0616cfb9280e2e38512df
6
+ metadata.gz: 1eb4c63b6ae46d11b8b8e01224e1e1943a1971b7e12054978ebab97c99939fb1d0316b3ff06912d4908e8f34df482477e4e8d1f9b53c02fcebc809e27b597d2a
7
+ data.tar.gz: 2300a3a9c32ed95a1c25a54fba4737bb052caee10756887f2ac114948ec8a8ba0df066ee30420f5e3d64e0f5855df7e487f4782bc242e27ea66b592c6c60dbe6
@@ -22,11 +22,47 @@ module Connectors
22
22
  []
23
23
  end
24
24
 
25
+ def when_triggered
26
+ loop do
27
+ connector_settings.each do |cs|
28
+ # crawler only supports :sync
29
+ if sync_triggered?(cs)
30
+ yield cs, :sync, nil
31
+ next
32
+ end
33
+
34
+ schedule_key = custom_schedule_triggered(cs)
35
+ yield cs, :sync, schedule_key if schedule_key
36
+ end
37
+ rescue *Utility::AUTHORIZATION_ERRORS => e
38
+ log_authorization_error(e)
39
+ rescue StandardError => e
40
+ log_standard_error(e)
41
+ ensure
42
+ if @is_shutting_down
43
+ break
44
+ end
45
+ sleep_for_poll_interval
46
+ end
47
+ end
48
+
25
49
  private
26
50
 
27
51
  def connector_registered?(service_type)
28
52
  service_type == 'elastic-crawler'
29
53
  end
54
+
55
+ # custom scheduling has no ordering, so the first-found schedule is returned
56
+ def custom_schedule_triggered(cs)
57
+ cs.custom_scheduling_settings.each do |key, custom_scheduling|
58
+ identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59
+ if schedule_triggered?(custom_scheduling, identifier)
60
+ return key
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
30
66
  end
31
67
  end
32
68
  end
@@ -0,0 +1,14 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Connectors
10
+ class JobTriggerMethod
11
+ ON_DEMAND = 'on_demand'
12
+ SCHEDULED = 'scheduled'
13
+ end
14
+ end
@@ -9,8 +9,11 @@
9
9
  require_relative 'utility'
10
10
 
11
11
  require_relative 'connectors/connector_status'
12
+ require_relative 'connectors/crawler/scheduler'
13
+ require_relative 'connectors/job_trigger_method'
12
14
  require_relative 'connectors/sync_status'
13
- require_relative 'core/scheduler'
15
+ require_relative 'core/connector_job'
16
+ require_relative 'core/connector_settings'
14
17
  require_relative 'core/elastic_connector_actions'
15
-
16
- require_relative 'connectors/crawler/scheduler'
18
+ require_relative 'core/filtering/validation_status'
19
+ require_relative 'core/scheduler'
@@ -0,0 +1,251 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash/indifferent_access'
10
+ require 'connectors/sync_status'
11
+ require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
13
+ require 'utility'
14
+
15
+ module Core
16
+ class ConnectorJob
17
+ DEFAULT_PAGE_SIZE = 100
18
+ IDLE_THRESHOLD = 60
19
+
20
+ def self.fetch_by_id(job_id)
21
+ es_response = ElasticConnectorActions.get_job(job_id)
22
+ return nil unless es_response[:found]
23
+
24
+ new(es_response)
25
+ end
26
+
27
+ def self.pending_jobs(connectors_ids: [], page_size: DEFAULT_PAGE_SIZE)
28
+ status_term = { status: Connectors::SyncStatus::PENDING_STATUSES }
29
+
30
+ query = { bool: { must: [{ terms: status_term }] } }
31
+
32
+ return fetch_jobs_by_query(query, page_size) if connectors_ids.empty?
33
+
34
+ query[:bool][:must] << { terms: { 'connector.id' => connectors_ids } }
35
+
36
+ fetch_jobs_by_query(query, page_size)
37
+ end
38
+
39
+ def self.orphaned_jobs(connector_ids = [], page_size = DEFAULT_PAGE_SIZE)
40
+ query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
41
+ fetch_jobs_by_query(query, page_size)
42
+ end
43
+
44
+ def self.delete_jobs(jobs)
45
+ query = { terms: { '_id': jobs.map(&:id) } }
46
+ ElasticConnectorActions.delete_jobs_by_query(query)
47
+ end
48
+
49
+ def self.idle_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
50
+ connector_ids = if connector_id
51
+ [connector_id]
52
+ else
53
+ ConnectorSettings.fetch_native_connectors.map(&:id)
54
+ end
55
+ query = {
56
+ bool: {
57
+ filter: [
58
+ { terms: { 'connector.id': connector_ids } },
59
+ { terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
60
+ { range: { last_seen: { lte: "now-#{IDLE_THRESHOLD}s" } } }
61
+ ]
62
+ }
63
+ }
64
+ fetch_jobs_by_query(query, page_size)
65
+ end
66
+
67
+ def self.enqueue(_connector_id)
68
+ nil
69
+ end
70
+
71
+ def id
72
+ @elasticsearch_response[:_id]
73
+ end
74
+
75
+ def [](property_name)
76
+ @elasticsearch_response[:_source][property_name]
77
+ end
78
+
79
+ def error
80
+ self[:error]
81
+ end
82
+
83
+ def status
84
+ self[:status]
85
+ end
86
+
87
+ def in_progress?
88
+ status == Connectors::SyncStatus::IN_PROGRESS
89
+ end
90
+
91
+ def canceling?
92
+ status == Connectors::SyncStatus::CANCELING
93
+ end
94
+
95
+ def suspended?
96
+ status == Connectors::SyncStatus::SUSPENDED
97
+ end
98
+
99
+ def canceled?
100
+ status == Connectors::SyncStatus::CANCELED
101
+ end
102
+
103
+ def pending?
104
+ Connectors::SyncStatus::PENDING_STATUSES.include?(status)
105
+ end
106
+
107
+ def active?
108
+ Connectors::SyncStatus::ACTIVE_STATUSES.include?(status)
109
+ end
110
+
111
+ def terminated?
112
+ Connectors::SyncStatus::TERMINAL_STATUSES.include?(status)
113
+ end
114
+
115
+ def connector_snapshot
116
+ self[:connector] || {}
117
+ end
118
+
119
+ def connector_id
120
+ connector_snapshot[:id]
121
+ end
122
+
123
+ def index_name
124
+ connector_snapshot[:index_name]
125
+ end
126
+
127
+ def language
128
+ connector_snapshot[:language]
129
+ end
130
+
131
+ def service_type
132
+ connector_snapshot[:service_type]
133
+ end
134
+
135
+ def configuration
136
+ connector_snapshot[:configuration]
137
+ end
138
+
139
+ def filtering
140
+ connector_snapshot[:filtering]
141
+ end
142
+
143
+ def pipeline
144
+ connector_snapshot[:pipeline] || {}
145
+ end
146
+
147
+ def extract_binary_content?
148
+ pipeline[:extract_binary_content]
149
+ end
150
+
151
+ def reduce_whitespace?
152
+ pipeline[:reduce_whitespace]
153
+ end
154
+
155
+ def run_ml_inference?
156
+ pipeline[:run_ml_inference]
157
+ end
158
+
159
+ def connector
160
+ @connector ||= ConnectorSettings.fetch_by_id(connector_id)
161
+ end
162
+
163
+ def update_metadata(ingestion_stats = {}, connector_metadata = {})
164
+ ingestion_stats ||= {}
165
+ doc = { :last_seen => Time.now }.merge(ingestion_stats)
166
+ doc[:metadata] = connector_metadata if connector_metadata&.any?
167
+ ElasticConnectorActions.update_job_fields(id, doc)
168
+ end
169
+
170
+ def done!(ingestion_stats = {}, connector_metadata = {})
171
+ terminate!(Connectors::SyncStatus::COMPLETED, nil, ingestion_stats, connector_metadata)
172
+ end
173
+
174
+ def error!(message, ingestion_stats = {}, connector_metadata = {})
175
+ terminate!(Connectors::SyncStatus::ERROR, message, ingestion_stats, connector_metadata)
176
+ end
177
+
178
+ def cancel!(ingestion_stats = {}, connector_metadata = {})
179
+ terminate!(Connectors::SyncStatus::CANCELED, nil, ingestion_stats, connector_metadata)
180
+ end
181
+
182
+ def with_concurrency_control
183
+ response = ElasticConnectorActions.get_job(id)
184
+
185
+ yield response, response['_seq_no'], response['_primary_term']
186
+ end
187
+
188
+ def make_running!
189
+ with_concurrency_control do |es_doc, seq_no, primary_term|
190
+ now = Time.now
191
+ doc = {
192
+ status: Connectors::SyncStatus::IN_PROGRESS,
193
+ started_at: now,
194
+ last_seen: now,
195
+ worker_hostname: Socket.gethostname
196
+ }
197
+
198
+ ElasticConnectorActions.update_job_fields(es_doc[:_id], doc, seq_no, primary_term)
199
+ end
200
+ end
201
+
202
+ def es_source
203
+ @elasticsearch_response[:_source]
204
+ end
205
+
206
+ private
207
+
208
+ def self.fetch_jobs_by_query(query, page_size)
209
+ results = []
210
+ offset = 0
211
+ loop do
212
+ response = ElasticConnectorActions.search_jobs(query, page_size, offset)
213
+
214
+ hits = response.dig('hits', 'hits') || []
215
+ total = response.dig('hits', 'total', 'value') || 0
216
+ results += hits.map { |hit| new(hit) }
217
+ break if results.size >= total
218
+ offset += hits.size
219
+ end
220
+
221
+ results
222
+ end
223
+
224
+ def initialize(es_response)
225
+ # TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
226
+ @elasticsearch_response = es_response.with_indifferent_access
227
+ end
228
+
229
+ def terminate!(status, error = nil, ingestion_stats = {}, connector_metadata = {})
230
+ ingestion_stats ||= {}
231
+ ingestion_stats[:total_document_count] = ElasticConnectorActions.document_count(index_name)
232
+ doc = {
233
+ :last_seen => Time.now,
234
+ :completed_at => Time.now,
235
+ :status => status,
236
+ :error => error
237
+ }.merge(ingestion_stats)
238
+ doc[:canceled_at] = Time.now if status == Connectors::SyncStatus::CANCELED
239
+ doc[:metadata] = connector_metadata if connector_metadata&.any?
240
+ ElasticConnectorActions.update_job_fields(id, doc)
241
+ end
242
+
243
+ def seq_no
244
+ @elasticsearch_response[:_seq_no]
245
+ end
246
+
247
+ def primary_term
248
+ @elasticsearch_response[:_primary_term]
249
+ end
250
+ end
251
+ end
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/sync_status'
11
12
  require 'core/elastic_connector_actions'
12
13
  require 'utility'
13
14
 
@@ -49,6 +50,11 @@ module Core
49
50
  fetch_connectors_by_query(query, page_size)
50
51
  end
51
52
 
53
+ def self.fetch_all_connectors(page_size = DEFAULT_PAGE_SIZE)
54
+ query = { match_all: {} }
55
+ fetch_connectors_by_query(query, page_size)
56
+ end
57
+
52
58
  def id
53
59
  @elasticsearch_response[:_id]
54
60
  end
@@ -58,6 +64,24 @@ module Core
58
64
  @elasticsearch_response[:_source][property_name]
59
65
  end
60
66
 
67
+ def features
68
+ self[:features] || {}
69
+ end
70
+
71
+ # .dig version is the modern features way of doing things,
72
+ # Right-hand of OR operator is legacy features support
73
+ # When this is fixed with a migration, we can go ahead
74
+ def filtering_rule_feature_enabled?
75
+ !!features.dig(:sync_rules, :basic, :enabled) || !!features[:filtering_rules]
76
+ end
77
+ def filtering_advanced_config_feature_enabled?
78
+ !!features.dig(:sync_rules, :advanced, :enabled) || !!features[:filtering_advanced_config]
79
+ end
80
+
81
+ def any_filtering_feature_enabled?
82
+ filtering_rule_feature_enabled? || filtering_advanced_config_feature_enabled?
83
+ end
84
+
61
85
  def index_name
62
86
  self[:index_name]
63
87
  end
@@ -82,6 +106,18 @@ module Core
82
106
  self[:scheduling]
83
107
  end
84
108
 
109
+ def custom_scheduling_settings
110
+ self[:custom_scheduling]
111
+ end
112
+
113
+ def sync_now?
114
+ self[:sync_now] == true
115
+ end
116
+
117
+ def last_synced
118
+ self[:last_synced]
119
+ end
120
+
85
121
  def filtering
86
122
  # assume for now, that first object in filtering array or a filter object itself is the only filtering object
87
123
  filtering = @elasticsearch_response.dig(:_source, :filtering)
@@ -93,18 +129,6 @@ module Core
93
129
  Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
94
130
  end
95
131
 
96
- def extract_binary_content?
97
- Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
98
- end
99
-
100
- def reduce_whitespace?
101
- Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
102
- end
103
-
104
- def run_ml_inference?
105
- Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
106
- end
107
-
108
132
  def formatted
109
133
  properties = ["ID: #{id}"]
110
134
  properties << "Service type: #{service_type}" if service_type
@@ -130,19 +154,23 @@ module Core
130
154
  end
131
155
 
132
156
  def update_last_sync!(job)
157
+ # if job is nil, connector still needs to be updated, to avoid it stuck at in_progress
158
+ job_status = job&.status || Connectors::SyncStatus::ERROR
159
+ job_error = job.nil? ? 'Could\'t find the job' : job.error
160
+ job_error ||= 'unknown error' if job_status == Connectors::SyncStatus::ERROR
161
+ connector_status = (job_status == Connectors::SyncStatus::ERROR ? Connectors::ConnectorStatus::ERROR : Connectors::ConnectorStatus::CONNECTED)
133
162
  doc = {
134
- :last_sync_status => job.status,
163
+ :last_sync_status => job_status,
135
164
  :last_synced => Time.now,
136
- :last_sync_error => job.error,
137
- :error => job.error
165
+ :last_sync_error => job_error,
166
+ :status => connector_status,
167
+ :error => job_error
138
168
  }
139
-
140
- if job.terminated?
169
+ if job&.terminated?
141
170
  doc[:last_indexed_document_count] = job[:indexed_document_count]
142
171
  doc[:last_deleted_document_count] = job[:deleted_document_count]
143
172
  end
144
-
145
- Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
173
+ Core::ElasticConnectorActions.update_connector_fields(id, doc)
146
174
  end
147
175
 
148
176
  private
@@ -8,6 +8,7 @@
8
8
  #
9
9
  require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/job_trigger_method'
11
12
  require 'connectors/sync_status'
12
13
  require 'utility'
13
14
  require 'elastic-transport'
@@ -91,6 +92,17 @@ module Core
91
92
  )
92
93
  end
93
94
 
95
+ def delete_jobs_by_query(query)
96
+ client.delete_by_query(
97
+ :index => Utility::Constants::JOB_INDEX,
98
+ :body => { :query => query }
99
+ )
100
+ end
101
+
102
+ def delete_indices(indices)
103
+ client.indices.delete(:index => indices, :ignore_unavailable => true)
104
+ end
105
+
94
106
  def update_connector_configuration(connector_id, configuration)
95
107
  update_connector_fields(connector_id, :configuration => configuration)
96
108
  end
@@ -145,12 +157,37 @@ module Core
145
157
  )
146
158
  end
147
159
 
148
- def update_connector_last_sync_status(connector_id, last_sync_status)
160
+ def update_connector_sync_start(connector_id)
161
+ doc = connector_with_concurrency_control(connector_id)
162
+
163
+ body = {
164
+ last_sync_status: Connectors::SyncStatus::IN_PROGRESS,
165
+ last_sync_error: nil,
166
+ status: Connectors::ConnectorStatus::CONNECTED
167
+ }
168
+
169
+ update_connector_fields(
170
+ connector_id,
171
+ body,
172
+ doc[:seq_no],
173
+ doc[:primary_term]
174
+ )
175
+ end
176
+
177
+ def update_connector_custom_scheduling_last_synced(connector_id, schedule_key)
149
178
  doc = connector_with_concurrency_control(connector_id)
150
179
 
180
+ body = {
181
+ :custom_scheduling => {
182
+ schedule_key => {
183
+ :last_synced => Time.now
184
+ }
185
+ }
186
+ }
187
+
151
188
  update_connector_fields(
152
189
  connector_id,
153
- { last_sync_status: last_sync_status },
190
+ body,
154
191
  doc[:seq_no],
155
192
  doc[:primary_term]
156
193
  )
@@ -178,13 +215,15 @@ module Core
178
215
  status: Connectors::SyncStatus::PENDING,
179
216
  created_at: Time.now,
180
217
  last_seen: Time.now,
218
+ trigger_method: connector_settings.sync_now? ? Connectors::JobTriggerMethod::ON_DEMAND : Connectors::JobTriggerMethod::SCHEDULED,
181
219
  connector: {
182
220
  id: connector_settings.id,
183
221
  filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
184
222
  index_name: connector_settings.index_name,
185
223
  language: connector_settings[:language],
186
224
  pipeline: connector_settings[:pipeline],
187
- service_type: connector_settings.service_type
225
+ service_type: connector_settings.service_type,
226
+ configuration: connector_settings.configuration
188
227
  }
189
228
  }
190
229
 
@@ -220,37 +259,6 @@ module Core
220
259
  update_connector_fields(connector_id, body)
221
260
  end
222
261
 
223
- def update_sync(job_id, metadata)
224
- body = {
225
- :doc => { :last_seen => Time.now }.merge(metadata)
226
- }
227
- client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
228
- end
229
-
230
- def complete_sync(connector_id, job_id, metadata, error)
231
- sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
232
-
233
- metadata ||= {}
234
-
235
- update_connector_fields(connector_id,
236
- :last_sync_status => sync_status,
237
- :last_sync_error => error,
238
- :error => error,
239
- :last_synced => Time.now,
240
- :last_indexed_document_count => metadata[:indexed_document_count],
241
- :last_deleted_document_count => metadata[:deleted_document_count])
242
-
243
- body = {
244
- :doc => {
245
- :status => sync_status,
246
- :completed_at => Time.now,
247
- :last_seen => Time.now,
248
- :error => error
249
- }.merge(metadata)
250
- }
251
- client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
252
- end
253
-
254
262
  def fetch_document_ids(index_name)
255
263
  page_size = 1000
256
264
  result = []
@@ -331,9 +339,11 @@ module Core
331
339
  # Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
332
340
  def ensure_connectors_index_exists
333
341
  mappings = {
342
+ :dynamic => false,
334
343
  :properties => {
335
344
  :api_key_id => { :type => :keyword },
336
345
  :configuration => { :type => :object },
346
+ :custom_schedule => { :type => :object },
337
347
  :description => { :type => :text },
338
348
  :error => { :type => :keyword },
339
349
  :features => {
@@ -451,6 +461,7 @@ module Core
451
461
  # Creation of job index should be handled by Kibana, this method is only used by ftest.rb
452
462
  def ensure_job_index_exists
453
463
  mappings = {
464
+ :dynamic => false,
454
465
  :properties => {
455
466
  :cancelation_requested_at => { :type => :date },
456
467
  :canceled_at => { :type => :date },
@@ -528,8 +539,8 @@ module Core
528
539
  end
529
540
 
530
541
  def document_count(index_name)
531
- client.indices.refresh(:index => index_name)
532
- client.count(:index => index_name)['count']
542
+ client.indices.refresh(:index => index_name, :ignore_unavailable => true)
543
+ client.count(:index => index_name, :ignore_unavailable => true)['count']
533
544
  end
534
545
 
535
546
  private
@@ -44,17 +44,14 @@ module Core
44
44
  end
45
45
  end
46
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
47
- Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
+ log_authorization_error(e)
48
48
  rescue StandardError => e
49
- Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
+ log_standard_error(e)
50
50
  ensure
51
51
  if @is_shutting_down
52
52
  break
53
53
  end
54
- if @poll_interval > 0 && !@is_shutting_down
55
- Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
56
- sleep(@poll_interval)
57
- end
54
+ sleep_for_poll_interval
58
55
  end
59
56
  end
60
57
 
@@ -78,56 +75,12 @@ module Core
78
75
  end
79
76
 
80
77
  # Sync when sync_now flag is true for the connector
81
- if connector_settings[:sync_now] == true
78
+ if connector_settings.sync_now?
82
79
  Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
83
80
  return true
84
81
  end
85
82
 
86
- # Don't sync if sync is explicitly disabled
87
- scheduling_settings = connector_settings.scheduling_settings
88
- unless scheduling_settings.present? && scheduling_settings[:enabled] == true
89
- Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
90
- return false
91
- end
92
-
93
- # We want to sync when sync never actually happened
94
- last_synced = connector_settings[:last_synced]
95
- if last_synced.nil? || last_synced.empty?
96
- Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
97
- return true
98
- end
99
-
100
- current_schedule = scheduling_settings[:interval]
101
-
102
- # Don't sync if there is no actual scheduling interval
103
- if current_schedule.nil? || current_schedule.empty?
104
- Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
105
- return false
106
- end
107
-
108
- current_schedule = begin
109
- Utility::Cron.quartz_to_crontab(current_schedule)
110
- rescue StandardError => e
111
- Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
112
- return false
113
- end
114
- cron_parser = Fugit::Cron.parse(current_schedule)
115
-
116
- # Don't sync if the scheduling interval is non-parsable
117
- unless cron_parser
118
- Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
119
- return false
120
- end
121
-
122
- next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
123
-
124
- # Sync if next trigger for the connector is in past
125
- if next_trigger_time < Time.now
126
- Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
127
- return true
128
- end
129
-
130
- false
83
+ schedule_triggered?(connector_settings.scheduling_settings, connector_settings.formatted)
131
84
  end
132
85
 
133
86
  def heartbeat_triggered?(connector_settings)
@@ -148,6 +101,12 @@ module Core
148
101
  end
149
102
 
150
103
  def filtering_validation_triggered?(connector_settings)
104
+ unless connector_settings.any_filtering_feature_enabled?
105
+ Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
106
+
107
+ return false
108
+ end
109
+
151
110
  filtering = connector_settings.filtering
152
111
 
153
112
  unless filtering.present?
@@ -189,5 +148,61 @@ module Core
189
148
  false
190
149
  end
191
150
  end
151
+
152
+ def schedule_triggered?(scheduling_settings, identifier)
153
+ # Don't sync if sync is explicitly disabled
154
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
155
+ Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
156
+ return false
157
+ end
158
+
159
+ current_schedule = scheduling_settings[:interval]
160
+
161
+ # Don't sync if there is no actual scheduling interval
162
+ if current_schedule.nil? || current_schedule.empty?
163
+ Utility::Logger.warn("No sync schedule configured for #{identifier}.")
164
+ return false
165
+ end
166
+
167
+ current_schedule =
168
+ begin
169
+ Utility::Cron.quartz_to_crontab(current_schedule)
170
+ rescue StandardError => e
171
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
172
+ return false
173
+ end
174
+ cron_parser = Fugit::Cron.parse(current_schedule)
175
+
176
+ # Don't sync if the scheduling interval is non-parsable
177
+ unless cron_parser
178
+ Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
179
+ return false
180
+ end
181
+
182
+ next_trigger_time = cron_parser.next_time(Time.now)
183
+
184
+ # Sync if next trigger happens before the next poll
185
+ if next_trigger_time <= Time.now + @poll_interval
186
+ Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
187
+ return true
188
+ end
189
+
190
+ false
191
+ end
192
+
193
+ def sleep_for_poll_interval
194
+ if @poll_interval > 0 && !@is_shutting_down
195
+ Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
196
+ sleep(@poll_interval)
197
+ end
198
+ end
199
+
200
+ def log_authorization_error(e)
201
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
202
+ end
203
+
204
+ def log_standard_error(e)
205
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
206
+ end
192
207
  end
193
208
  end
@@ -13,7 +13,7 @@ module Utility
13
13
  class QueueOverflowError < StandardError; end
14
14
 
15
15
  # 500 items or 5MB
16
- def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
16
+ def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
17
17
  @operation_count_threshold = operation_count_threshold.freeze
18
18
  @size_threshold = size_threshold.freeze
19
19
 
@@ -16,8 +16,6 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
- FILTERING_RULES_FEATURE = 'filtering_rules'
20
- FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
19
 
22
20
  # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
21
  DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
@@ -51,7 +51,7 @@ module Utility
51
51
  def note_success
52
52
  @consecutive_error_count = 0
53
53
  @success_count += 1
54
- increment_window_index
54
+ track_window_error(false)
55
55
  end
56
56
 
57
57
  def note_error(error, id: Time.now.to_i)
@@ -60,10 +60,9 @@ module Utility
60
60
  Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
61
  @total_error_count += 1
62
62
  @consecutive_error_count += 1
63
- @window_errors[@window_index] = true
64
63
  @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
64
  @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
- increment_window_index
65
+ track_window_error(true)
67
66
  @last_error = error
68
67
 
69
68
  raise_if_necessary
@@ -92,10 +91,32 @@ module Utility
92
91
  end
93
92
 
94
93
  def num_errors_in_window
95
- @window_errors.count(&:itself).to_f
94
+ @window_errors.count(true).to_f
96
95
  end
97
96
 
98
- def increment_window_index
97
+ def track_window_error(is_error)
98
+ # We keep the errors array of the size @window_size this way, imagine @window_size = 5
99
+ # Error array inits as falses:
100
+ # [ false, false, false, false, false ]
101
+ # Third document raises an error:
102
+ # [ false, false, true, false, false ]
103
+ # ^^^^
104
+ # 2 % 5 == 2
105
+ # Fifth document raises an error:
106
+ # [ false, false, true, false, true ]
107
+ # ^^^^
108
+ # 4 % 5 == 4
109
+ # Sixth document raises an error:
110
+ # [ true, false, true, false, true ]
111
+ # ^^^^
112
+ # 5 % 5 == 0
113
+ #
114
+ # Eigth document is successful:
115
+ # [ true, false, false, false, true ]
116
+ # ^^^^^
117
+ # 7 % 5 == 2
118
+ # And so on.
119
+ @window_errors[@window_index] = is_error
99
120
  @window_index = (@window_index + 1) % @window_size
100
121
  end
101
122
 
@@ -43,6 +43,10 @@ module Utility
43
43
  configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
44
  configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
45
 
46
+ # headers
47
+ # these are necessary for cloud-hosted native connectors
48
+ configs[:headers] = es_config[:headers].to_h if es_config[:headers]
49
+
46
50
  # if log or trace is activated, we use the application logger
47
51
  configs[:logger] = if configs[:log] || configs[:trace]
48
52
  Utility::Logger.logger
@@ -17,6 +17,10 @@ module Utility
17
17
 
18
18
  filter.present? ? filter : {}
19
19
  end
20
+
21
+ def rule_pre_processing_active?(filter)
22
+ !filter.dig('advanced_snippet', 'value')&.present?
23
+ end
20
24
  end
21
25
  end
22
26
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.7.0.0.pre.20221117T004939Z
4
+ version: 8.7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-17 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.2.6
19
+ version: '5.2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.2.6
26
+ version: '5.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: ecs-logging
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -104,8 +104,10 @@ files:
104
104
  - NOTICE.txt
105
105
  - lib/connectors/connector_status.rb
106
106
  - lib/connectors/crawler/scheduler.rb
107
+ - lib/connectors/job_trigger_method.rb
107
108
  - lib/connectors/sync_status.rb
108
109
  - lib/connectors_utility.rb
110
+ - lib/core/connector_job.rb
109
111
  - lib/core/connector_settings.rb
110
112
  - lib/core/elastic_connector_actions.rb
111
113
  - lib/core/filtering/validation_status.rb
@@ -130,9 +132,9 @@ homepage: https://github.com/elastic/connectors-ruby
130
132
  licenses:
131
133
  - Elastic-2.0
132
134
  metadata:
133
- revision: 294214a26b0fe9a4347763b01de681c336e8daae
134
- repository: https://github.com/elastic/connectors-ruby.git
135
- post_install_message:
135
+ revision: ae6292137eef9acac1259c5e7e71a3d0e149210b
136
+ repository: https://github.com/elastic/connectors-ruby
137
+ post_install_message:
136
138
  rdoc_options: []
137
139
  require_paths:
138
140
  - lib
@@ -143,12 +145,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
143
145
  version: '0'
144
146
  required_rubygems_version: !ruby/object:Gem::Requirement
145
147
  requirements:
146
- - - ">"
148
+ - - ">="
147
149
  - !ruby/object:Gem::Version
148
- version: 1.3.1
150
+ version: '0'
149
151
  requirements: []
150
152
  rubygems_version: 3.0.3.1
151
- signing_key:
153
+ signing_key:
152
154
  specification_version: 4
153
155
  summary: Gem containing shared Connector Services libraries
154
156
  test_files: []