connectors_utility 8.7.0.0.pre.20221117T004939Z → 8.7.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d2972f8e6974a79b6088ce6c03453c327132ce19ffb09dbf30f349eae4c2108
4
- data.tar.gz: 4fd458de07be07923e0675dc0f341b8211ba3daeec8ac27bfb4f9eb9aff2334a
3
+ metadata.gz: 8bde5d9fcfd7af80dd1a20bc3fdffb3e509af46fc492607fa963858aacdb79bc
4
+ data.tar.gz: b3f26fba69d08e1add58b476a37a74f3fa855790d9bcea05c7a4f5ed3b1fd9bf
5
5
  SHA512:
6
- metadata.gz: 9db02a3003d5645cbb5d57d4ca1bdb1acb65234f5a931afd9ccb06e2fbbe25be2394c65a72a9ae36038c8c127c35a0d937c83558ede0e4960fc688b073db052a
7
- data.tar.gz: d02681e0d4009420b949ec649c9eac52bd533eee493c181ea1ffb15d939b561e81f234bac4885eefd0ea82c8564d7d330c0635c535b0616cfb9280e2e38512df
6
+ metadata.gz: 1eb4c63b6ae46d11b8b8e01224e1e1943a1971b7e12054978ebab97c99939fb1d0316b3ff06912d4908e8f34df482477e4e8d1f9b53c02fcebc809e27b597d2a
7
+ data.tar.gz: 2300a3a9c32ed95a1c25a54fba4737bb052caee10756887f2ac114948ec8a8ba0df066ee30420f5e3d64e0f5855df7e487f4782bc242e27ea66b592c6c60dbe6
@@ -22,11 +22,47 @@ module Connectors
22
22
  []
23
23
  end
24
24
 
25
+ def when_triggered
26
+ loop do
27
+ connector_settings.each do |cs|
28
+ # crawler only supports :sync
29
+ if sync_triggered?(cs)
30
+ yield cs, :sync, nil
31
+ next
32
+ end
33
+
34
+ schedule_key = custom_schedule_triggered(cs)
35
+ yield cs, :sync, schedule_key if schedule_key
36
+ end
37
+ rescue *Utility::AUTHORIZATION_ERRORS => e
38
+ log_authorization_error(e)
39
+ rescue StandardError => e
40
+ log_standard_error(e)
41
+ ensure
42
+ if @is_shutting_down
43
+ break
44
+ end
45
+ sleep_for_poll_interval
46
+ end
47
+ end
48
+
25
49
  private
26
50
 
27
51
  def connector_registered?(service_type)
28
52
  service_type == 'elastic-crawler'
29
53
  end
54
+
55
+ # custom scheduling has no ordering, so the first-found schedule is returned
56
+ def custom_schedule_triggered(cs)
57
+ cs.custom_scheduling_settings.each do |key, custom_scheduling|
58
+ identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59
+ if schedule_triggered?(custom_scheduling, identifier)
60
+ return key
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
30
66
  end
31
67
  end
32
68
  end
@@ -0,0 +1,14 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Connectors
10
+ class JobTriggerMethod
11
+ ON_DEMAND = 'on_demand'
12
+ SCHEDULED = 'scheduled'
13
+ end
14
+ end
@@ -9,8 +9,11 @@
9
9
  require_relative 'utility'
10
10
 
11
11
  require_relative 'connectors/connector_status'
12
+ require_relative 'connectors/crawler/scheduler'
13
+ require_relative 'connectors/job_trigger_method'
12
14
  require_relative 'connectors/sync_status'
13
- require_relative 'core/scheduler'
15
+ require_relative 'core/connector_job'
16
+ require_relative 'core/connector_settings'
14
17
  require_relative 'core/elastic_connector_actions'
15
-
16
- require_relative 'connectors/crawler/scheduler'
18
+ require_relative 'core/filtering/validation_status'
19
+ require_relative 'core/scheduler'
@@ -0,0 +1,251 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash/indifferent_access'
10
+ require 'connectors/sync_status'
11
+ require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
13
+ require 'utility'
14
+
15
+ module Core
16
+ class ConnectorJob
17
+ DEFAULT_PAGE_SIZE = 100
18
+ IDLE_THRESHOLD = 60
19
+
20
+ def self.fetch_by_id(job_id)
21
+ es_response = ElasticConnectorActions.get_job(job_id)
22
+ return nil unless es_response[:found]
23
+
24
+ new(es_response)
25
+ end
26
+
27
+ def self.pending_jobs(connectors_ids: [], page_size: DEFAULT_PAGE_SIZE)
28
+ status_term = { status: Connectors::SyncStatus::PENDING_STATUSES }
29
+
30
+ query = { bool: { must: [{ terms: status_term }] } }
31
+
32
+ return fetch_jobs_by_query(query, page_size) if connectors_ids.empty?
33
+
34
+ query[:bool][:must] << { terms: { 'connector.id' => connectors_ids } }
35
+
36
+ fetch_jobs_by_query(query, page_size)
37
+ end
38
+
39
+ def self.orphaned_jobs(connector_ids = [], page_size = DEFAULT_PAGE_SIZE)
40
+ query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
41
+ fetch_jobs_by_query(query, page_size)
42
+ end
43
+
44
+ def self.delete_jobs(jobs)
45
+ query = { terms: { '_id': jobs.map(&:id) } }
46
+ ElasticConnectorActions.delete_jobs_by_query(query)
47
+ end
48
+
49
+ def self.idle_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
50
+ connector_ids = if connector_id
51
+ [connector_id]
52
+ else
53
+ ConnectorSettings.fetch_native_connectors.map(&:id)
54
+ end
55
+ query = {
56
+ bool: {
57
+ filter: [
58
+ { terms: { 'connector.id': connector_ids } },
59
+ { terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
60
+ { range: { last_seen: { lte: "now-#{IDLE_THRESHOLD}s" } } }
61
+ ]
62
+ }
63
+ }
64
+ fetch_jobs_by_query(query, page_size)
65
+ end
66
+
67
+ def self.enqueue(_connector_id)
68
+ nil
69
+ end
70
+
71
+ def id
72
+ @elasticsearch_response[:_id]
73
+ end
74
+
75
+ def [](property_name)
76
+ @elasticsearch_response[:_source][property_name]
77
+ end
78
+
79
+ def error
80
+ self[:error]
81
+ end
82
+
83
+ def status
84
+ self[:status]
85
+ end
86
+
87
+ def in_progress?
88
+ status == Connectors::SyncStatus::IN_PROGRESS
89
+ end
90
+
91
+ def canceling?
92
+ status == Connectors::SyncStatus::CANCELING
93
+ end
94
+
95
+ def suspended?
96
+ status == Connectors::SyncStatus::SUSPENDED
97
+ end
98
+
99
+ def canceled?
100
+ status == Connectors::SyncStatus::CANCELED
101
+ end
102
+
103
+ def pending?
104
+ Connectors::SyncStatus::PENDING_STATUSES.include?(status)
105
+ end
106
+
107
+ def active?
108
+ Connectors::SyncStatus::ACTIVE_STATUSES.include?(status)
109
+ end
110
+
111
+ def terminated?
112
+ Connectors::SyncStatus::TERMINAL_STATUSES.include?(status)
113
+ end
114
+
115
+ def connector_snapshot
116
+ self[:connector] || {}
117
+ end
118
+
119
+ def connector_id
120
+ connector_snapshot[:id]
121
+ end
122
+
123
+ def index_name
124
+ connector_snapshot[:index_name]
125
+ end
126
+
127
+ def language
128
+ connector_snapshot[:language]
129
+ end
130
+
131
+ def service_type
132
+ connector_snapshot[:service_type]
133
+ end
134
+
135
+ def configuration
136
+ connector_snapshot[:configuration]
137
+ end
138
+
139
+ def filtering
140
+ connector_snapshot[:filtering]
141
+ end
142
+
143
+ def pipeline
144
+ connector_snapshot[:pipeline] || {}
145
+ end
146
+
147
+ def extract_binary_content?
148
+ pipeline[:extract_binary_content]
149
+ end
150
+
151
+ def reduce_whitespace?
152
+ pipeline[:reduce_whitespace]
153
+ end
154
+
155
+ def run_ml_inference?
156
+ pipeline[:run_ml_inference]
157
+ end
158
+
159
+ def connector
160
+ @connector ||= ConnectorSettings.fetch_by_id(connector_id)
161
+ end
162
+
163
+ def update_metadata(ingestion_stats = {}, connector_metadata = {})
164
+ ingestion_stats ||= {}
165
+ doc = { :last_seen => Time.now }.merge(ingestion_stats)
166
+ doc[:metadata] = connector_metadata if connector_metadata&.any?
167
+ ElasticConnectorActions.update_job_fields(id, doc)
168
+ end
169
+
170
+ def done!(ingestion_stats = {}, connector_metadata = {})
171
+ terminate!(Connectors::SyncStatus::COMPLETED, nil, ingestion_stats, connector_metadata)
172
+ end
173
+
174
+ def error!(message, ingestion_stats = {}, connector_metadata = {})
175
+ terminate!(Connectors::SyncStatus::ERROR, message, ingestion_stats, connector_metadata)
176
+ end
177
+
178
+ def cancel!(ingestion_stats = {}, connector_metadata = {})
179
+ terminate!(Connectors::SyncStatus::CANCELED, nil, ingestion_stats, connector_metadata)
180
+ end
181
+
182
+ def with_concurrency_control
183
+ response = ElasticConnectorActions.get_job(id)
184
+
185
+ yield response, response['_seq_no'], response['_primary_term']
186
+ end
187
+
188
+ def make_running!
189
+ with_concurrency_control do |es_doc, seq_no, primary_term|
190
+ now = Time.now
191
+ doc = {
192
+ status: Connectors::SyncStatus::IN_PROGRESS,
193
+ started_at: now,
194
+ last_seen: now,
195
+ worker_hostname: Socket.gethostname
196
+ }
197
+
198
+ ElasticConnectorActions.update_job_fields(es_doc[:_id], doc, seq_no, primary_term)
199
+ end
200
+ end
201
+
202
+ def es_source
203
+ @elasticsearch_response[:_source]
204
+ end
205
+
206
+ private
207
+
208
+ def self.fetch_jobs_by_query(query, page_size)
209
+ results = []
210
+ offset = 0
211
+ loop do
212
+ response = ElasticConnectorActions.search_jobs(query, page_size, offset)
213
+
214
+ hits = response.dig('hits', 'hits') || []
215
+ total = response.dig('hits', 'total', 'value') || 0
216
+ results += hits.map { |hit| new(hit) }
217
+ break if results.size >= total
218
+ offset += hits.size
219
+ end
220
+
221
+ results
222
+ end
223
+
224
+ def initialize(es_response)
225
+ # TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
226
+ @elasticsearch_response = es_response.with_indifferent_access
227
+ end
228
+
229
+ def terminate!(status, error = nil, ingestion_stats = {}, connector_metadata = {})
230
+ ingestion_stats ||= {}
231
+ ingestion_stats[:total_document_count] = ElasticConnectorActions.document_count(index_name)
232
+ doc = {
233
+ :last_seen => Time.now,
234
+ :completed_at => Time.now,
235
+ :status => status,
236
+ :error => error
237
+ }.merge(ingestion_stats)
238
+ doc[:canceled_at] = Time.now if status == Connectors::SyncStatus::CANCELED
239
+ doc[:metadata] = connector_metadata if connector_metadata&.any?
240
+ ElasticConnectorActions.update_job_fields(id, doc)
241
+ end
242
+
243
+ def seq_no
244
+ @elasticsearch_response[:_seq_no]
245
+ end
246
+
247
+ def primary_term
248
+ @elasticsearch_response[:_primary_term]
249
+ end
250
+ end
251
+ end
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/sync_status'
11
12
  require 'core/elastic_connector_actions'
12
13
  require 'utility'
13
14
 
@@ -49,6 +50,11 @@ module Core
49
50
  fetch_connectors_by_query(query, page_size)
50
51
  end
51
52
 
53
+ def self.fetch_all_connectors(page_size = DEFAULT_PAGE_SIZE)
54
+ query = { match_all: {} }
55
+ fetch_connectors_by_query(query, page_size)
56
+ end
57
+
52
58
  def id
53
59
  @elasticsearch_response[:_id]
54
60
  end
@@ -58,6 +64,24 @@ module Core
58
64
  @elasticsearch_response[:_source][property_name]
59
65
  end
60
66
 
67
+ def features
68
+ self[:features] || {}
69
+ end
70
+
71
+ # .dig version is the modern features way of doing things,
72
+ # Right-hand of OR operator is legacy features support
73
+ # When this is fixed with a migration, we can go ahead
74
+ def filtering_rule_feature_enabled?
75
+ !!features.dig(:sync_rules, :basic, :enabled) || !!features[:filtering_rules]
76
+ end
77
+ def filtering_advanced_config_feature_enabled?
78
+ !!features.dig(:sync_rules, :advanced, :enabled) || !!features[:filtering_advanced_config]
79
+ end
80
+
81
+ def any_filtering_feature_enabled?
82
+ filtering_rule_feature_enabled? || filtering_advanced_config_feature_enabled?
83
+ end
84
+
61
85
  def index_name
62
86
  self[:index_name]
63
87
  end
@@ -82,6 +106,18 @@ module Core
82
106
  self[:scheduling]
83
107
  end
84
108
 
109
+ def custom_scheduling_settings
110
+ self[:custom_scheduling]
111
+ end
112
+
113
+ def sync_now?
114
+ self[:sync_now] == true
115
+ end
116
+
117
+ def last_synced
118
+ self[:last_synced]
119
+ end
120
+
85
121
  def filtering
86
122
  # assume for now, that first object in filtering array or a filter object itself is the only filtering object
87
123
  filtering = @elasticsearch_response.dig(:_source, :filtering)
@@ -93,18 +129,6 @@ module Core
93
129
  Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
94
130
  end
95
131
 
96
- def extract_binary_content?
97
- Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
98
- end
99
-
100
- def reduce_whitespace?
101
- Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
102
- end
103
-
104
- def run_ml_inference?
105
- Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
106
- end
107
-
108
132
  def formatted
109
133
  properties = ["ID: #{id}"]
110
134
  properties << "Service type: #{service_type}" if service_type
@@ -130,19 +154,23 @@ module Core
130
154
  end
131
155
 
132
156
  def update_last_sync!(job)
157
+ # if job is nil, connector still needs to be updated, to avoid it stuck at in_progress
158
+ job_status = job&.status || Connectors::SyncStatus::ERROR
159
+ job_error = job.nil? ? 'Could\'t find the job' : job.error
160
+ job_error ||= 'unknown error' if job_status == Connectors::SyncStatus::ERROR
161
+ connector_status = (job_status == Connectors::SyncStatus::ERROR ? Connectors::ConnectorStatus::ERROR : Connectors::ConnectorStatus::CONNECTED)
133
162
  doc = {
134
- :last_sync_status => job.status,
163
+ :last_sync_status => job_status,
135
164
  :last_synced => Time.now,
136
- :last_sync_error => job.error,
137
- :error => job.error
165
+ :last_sync_error => job_error,
166
+ :status => connector_status,
167
+ :error => job_error
138
168
  }
139
-
140
- if job.terminated?
169
+ if job&.terminated?
141
170
  doc[:last_indexed_document_count] = job[:indexed_document_count]
142
171
  doc[:last_deleted_document_count] = job[:deleted_document_count]
143
172
  end
144
-
145
- Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
173
+ Core::ElasticConnectorActions.update_connector_fields(id, doc)
146
174
  end
147
175
 
148
176
  private
@@ -8,6 +8,7 @@
8
8
  #
9
9
  require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/job_trigger_method'
11
12
  require 'connectors/sync_status'
12
13
  require 'utility'
13
14
  require 'elastic-transport'
@@ -91,6 +92,17 @@ module Core
91
92
  )
92
93
  end
93
94
 
95
+ def delete_jobs_by_query(query)
96
+ client.delete_by_query(
97
+ :index => Utility::Constants::JOB_INDEX,
98
+ :body => { :query => query }
99
+ )
100
+ end
101
+
102
+ def delete_indices(indices)
103
+ client.indices.delete(:index => indices, :ignore_unavailable => true)
104
+ end
105
+
94
106
  def update_connector_configuration(connector_id, configuration)
95
107
  update_connector_fields(connector_id, :configuration => configuration)
96
108
  end
@@ -145,12 +157,37 @@ module Core
145
157
  )
146
158
  end
147
159
 
148
- def update_connector_last_sync_status(connector_id, last_sync_status)
160
+ def update_connector_sync_start(connector_id)
161
+ doc = connector_with_concurrency_control(connector_id)
162
+
163
+ body = {
164
+ last_sync_status: Connectors::SyncStatus::IN_PROGRESS,
165
+ last_sync_error: nil,
166
+ status: Connectors::ConnectorStatus::CONNECTED
167
+ }
168
+
169
+ update_connector_fields(
170
+ connector_id,
171
+ body,
172
+ doc[:seq_no],
173
+ doc[:primary_term]
174
+ )
175
+ end
176
+
177
+ def update_connector_custom_scheduling_last_synced(connector_id, schedule_key)
149
178
  doc = connector_with_concurrency_control(connector_id)
150
179
 
180
+ body = {
181
+ :custom_scheduling => {
182
+ schedule_key => {
183
+ :last_synced => Time.now
184
+ }
185
+ }
186
+ }
187
+
151
188
  update_connector_fields(
152
189
  connector_id,
153
- { last_sync_status: last_sync_status },
190
+ body,
154
191
  doc[:seq_no],
155
192
  doc[:primary_term]
156
193
  )
@@ -178,13 +215,15 @@ module Core
178
215
  status: Connectors::SyncStatus::PENDING,
179
216
  created_at: Time.now,
180
217
  last_seen: Time.now,
218
+ trigger_method: connector_settings.sync_now? ? Connectors::JobTriggerMethod::ON_DEMAND : Connectors::JobTriggerMethod::SCHEDULED,
181
219
  connector: {
182
220
  id: connector_settings.id,
183
221
  filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
184
222
  index_name: connector_settings.index_name,
185
223
  language: connector_settings[:language],
186
224
  pipeline: connector_settings[:pipeline],
187
- service_type: connector_settings.service_type
225
+ service_type: connector_settings.service_type,
226
+ configuration: connector_settings.configuration
188
227
  }
189
228
  }
190
229
 
@@ -220,37 +259,6 @@ module Core
220
259
  update_connector_fields(connector_id, body)
221
260
  end
222
261
 
223
- def update_sync(job_id, metadata)
224
- body = {
225
- :doc => { :last_seen => Time.now }.merge(metadata)
226
- }
227
- client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
228
- end
229
-
230
- def complete_sync(connector_id, job_id, metadata, error)
231
- sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
232
-
233
- metadata ||= {}
234
-
235
- update_connector_fields(connector_id,
236
- :last_sync_status => sync_status,
237
- :last_sync_error => error,
238
- :error => error,
239
- :last_synced => Time.now,
240
- :last_indexed_document_count => metadata[:indexed_document_count],
241
- :last_deleted_document_count => metadata[:deleted_document_count])
242
-
243
- body = {
244
- :doc => {
245
- :status => sync_status,
246
- :completed_at => Time.now,
247
- :last_seen => Time.now,
248
- :error => error
249
- }.merge(metadata)
250
- }
251
- client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
252
- end
253
-
254
262
  def fetch_document_ids(index_name)
255
263
  page_size = 1000
256
264
  result = []
@@ -331,9 +339,11 @@ module Core
331
339
  # Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
332
340
  def ensure_connectors_index_exists
333
341
  mappings = {
342
+ :dynamic => false,
334
343
  :properties => {
335
344
  :api_key_id => { :type => :keyword },
336
345
  :configuration => { :type => :object },
346
+ :custom_schedule => { :type => :object },
337
347
  :description => { :type => :text },
338
348
  :error => { :type => :keyword },
339
349
  :features => {
@@ -451,6 +461,7 @@ module Core
451
461
  # Creation of job index should be handled by Kibana, this method is only used by ftest.rb
452
462
  def ensure_job_index_exists
453
463
  mappings = {
464
+ :dynamic => false,
454
465
  :properties => {
455
466
  :cancelation_requested_at => { :type => :date },
456
467
  :canceled_at => { :type => :date },
@@ -528,8 +539,8 @@ module Core
528
539
  end
529
540
 
530
541
  def document_count(index_name)
531
- client.indices.refresh(:index => index_name)
532
- client.count(:index => index_name)['count']
542
+ client.indices.refresh(:index => index_name, :ignore_unavailable => true)
543
+ client.count(:index => index_name, :ignore_unavailable => true)['count']
533
544
  end
534
545
 
535
546
  private
@@ -44,17 +44,14 @@ module Core
44
44
  end
45
45
  end
46
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
47
- Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
+ log_authorization_error(e)
48
48
  rescue StandardError => e
49
- Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
+ log_standard_error(e)
50
50
  ensure
51
51
  if @is_shutting_down
52
52
  break
53
53
  end
54
- if @poll_interval > 0 && !@is_shutting_down
55
- Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
56
- sleep(@poll_interval)
57
- end
54
+ sleep_for_poll_interval
58
55
  end
59
56
  end
60
57
 
@@ -78,56 +75,12 @@ module Core
78
75
  end
79
76
 
80
77
  # Sync when sync_now flag is true for the connector
81
- if connector_settings[:sync_now] == true
78
+ if connector_settings.sync_now?
82
79
  Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
83
80
  return true
84
81
  end
85
82
 
86
- # Don't sync if sync is explicitly disabled
87
- scheduling_settings = connector_settings.scheduling_settings
88
- unless scheduling_settings.present? && scheduling_settings[:enabled] == true
89
- Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
90
- return false
91
- end
92
-
93
- # We want to sync when sync never actually happened
94
- last_synced = connector_settings[:last_synced]
95
- if last_synced.nil? || last_synced.empty?
96
- Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
97
- return true
98
- end
99
-
100
- current_schedule = scheduling_settings[:interval]
101
-
102
- # Don't sync if there is no actual scheduling interval
103
- if current_schedule.nil? || current_schedule.empty?
104
- Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
105
- return false
106
- end
107
-
108
- current_schedule = begin
109
- Utility::Cron.quartz_to_crontab(current_schedule)
110
- rescue StandardError => e
111
- Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
112
- return false
113
- end
114
- cron_parser = Fugit::Cron.parse(current_schedule)
115
-
116
- # Don't sync if the scheduling interval is non-parsable
117
- unless cron_parser
118
- Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
119
- return false
120
- end
121
-
122
- next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
123
-
124
- # Sync if next trigger for the connector is in past
125
- if next_trigger_time < Time.now
126
- Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
127
- return true
128
- end
129
-
130
- false
83
+ schedule_triggered?(connector_settings.scheduling_settings, connector_settings.formatted)
131
84
  end
132
85
 
133
86
  def heartbeat_triggered?(connector_settings)
@@ -148,6 +101,12 @@ module Core
148
101
  end
149
102
 
150
103
  def filtering_validation_triggered?(connector_settings)
104
+ unless connector_settings.any_filtering_feature_enabled?
105
+ Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
106
+
107
+ return false
108
+ end
109
+
151
110
  filtering = connector_settings.filtering
152
111
 
153
112
  unless filtering.present?
@@ -189,5 +148,61 @@ module Core
189
148
  false
190
149
  end
191
150
  end
151
+
152
+ def schedule_triggered?(scheduling_settings, identifier)
153
+ # Don't sync if sync is explicitly disabled
154
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
155
+ Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
156
+ return false
157
+ end
158
+
159
+ current_schedule = scheduling_settings[:interval]
160
+
161
+ # Don't sync if there is no actual scheduling interval
162
+ if current_schedule.nil? || current_schedule.empty?
163
+ Utility::Logger.warn("No sync schedule configured for #{identifier}.")
164
+ return false
165
+ end
166
+
167
+ current_schedule =
168
+ begin
169
+ Utility::Cron.quartz_to_crontab(current_schedule)
170
+ rescue StandardError => e
171
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
172
+ return false
173
+ end
174
+ cron_parser = Fugit::Cron.parse(current_schedule)
175
+
176
+ # Don't sync if the scheduling interval is non-parsable
177
+ unless cron_parser
178
+ Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
179
+ return false
180
+ end
181
+
182
+ next_trigger_time = cron_parser.next_time(Time.now)
183
+
184
+ # Sync if next trigger happens before the next poll
185
+ if next_trigger_time <= Time.now + @poll_interval
186
+ Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
187
+ return true
188
+ end
189
+
190
+ false
191
+ end
192
+
193
+ def sleep_for_poll_interval
194
+ if @poll_interval > 0 && !@is_shutting_down
195
+ Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
196
+ sleep(@poll_interval)
197
+ end
198
+ end
199
+
200
+ def log_authorization_error(e)
201
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
202
+ end
203
+
204
+ def log_standard_error(e)
205
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
206
+ end
192
207
  end
193
208
  end
@@ -13,7 +13,7 @@ module Utility
13
13
  class QueueOverflowError < StandardError; end
14
14
 
15
15
  # 500 items or 5MB
16
- def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
16
+ def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
17
17
  @operation_count_threshold = operation_count_threshold.freeze
18
18
  @size_threshold = size_threshold.freeze
19
19
 
@@ -16,8 +16,6 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
- FILTERING_RULES_FEATURE = 'filtering_rules'
20
- FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
19
 
22
20
  # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
21
  DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
@@ -51,7 +51,7 @@ module Utility
51
51
  def note_success
52
52
  @consecutive_error_count = 0
53
53
  @success_count += 1
54
- increment_window_index
54
+ track_window_error(false)
55
55
  end
56
56
 
57
57
  def note_error(error, id: Time.now.to_i)
@@ -60,10 +60,9 @@ module Utility
60
60
  Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
61
  @total_error_count += 1
62
62
  @consecutive_error_count += 1
63
- @window_errors[@window_index] = true
64
63
  @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
64
  @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
- increment_window_index
65
+ track_window_error(true)
67
66
  @last_error = error
68
67
 
69
68
  raise_if_necessary
@@ -92,10 +91,32 @@ module Utility
92
91
  end
93
92
 
94
93
  def num_errors_in_window
95
- @window_errors.count(&:itself).to_f
94
+ @window_errors.count(true).to_f
96
95
  end
97
96
 
98
- def increment_window_index
97
+ def track_window_error(is_error)
98
+ # We keep the errors array of the size @window_size this way, imagine @window_size = 5
99
+ # Error array inits as falses:
100
+ # [ false, false, false, false, false ]
101
+ # Third document raises an error:
102
+ # [ false, false, true, false, false ]
103
+ # ^^^^
104
+ # 2 % 5 == 2
105
+ # Fifth document raises an error:
106
+ # [ false, false, true, false, true ]
107
+ # ^^^^
108
+ # 4 % 5 == 4
109
+ # Sixth document raises an error:
110
+ # [ true, false, true, false, true ]
111
+ # ^^^^
112
+ # 5 % 5 == 0
113
+ #
114
+ # Eigth document is successful:
115
+ # [ true, false, false, false, true ]
116
+ # ^^^^^
117
+ # 7 % 5 == 2
118
+ # And so on.
119
+ @window_errors[@window_index] = is_error
99
120
  @window_index = (@window_index + 1) % @window_size
100
121
  end
101
122
 
@@ -43,6 +43,10 @@ module Utility
43
43
  configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
44
  configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
45
 
46
+ # headers
47
+ # these are necessary for cloud-hosted native connectors
48
+ configs[:headers] = es_config[:headers].to_h if es_config[:headers]
49
+
46
50
  # if log or trace is activated, we use the application logger
47
51
  configs[:logger] = if configs[:log] || configs[:trace]
48
52
  Utility::Logger.logger
@@ -17,6 +17,10 @@ module Utility
17
17
 
18
18
  filter.present? ? filter : {}
19
19
  end
20
+
21
+ def rule_pre_processing_active?(filter)
22
+ !filter.dig('advanced_snippet', 'value')&.present?
23
+ end
20
24
  end
21
25
  end
22
26
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.7.0.0.pre.20221117T004939Z
4
+ version: 8.7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-17 00:00:00.000000000 Z
11
+ date: 2023-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.2.6
19
+ version: '5.2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.2.6
26
+ version: '5.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: ecs-logging
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -104,8 +104,10 @@ files:
104
104
  - NOTICE.txt
105
105
  - lib/connectors/connector_status.rb
106
106
  - lib/connectors/crawler/scheduler.rb
107
+ - lib/connectors/job_trigger_method.rb
107
108
  - lib/connectors/sync_status.rb
108
109
  - lib/connectors_utility.rb
110
+ - lib/core/connector_job.rb
109
111
  - lib/core/connector_settings.rb
110
112
  - lib/core/elastic_connector_actions.rb
111
113
  - lib/core/filtering/validation_status.rb
@@ -130,9 +132,9 @@ homepage: https://github.com/elastic/connectors-ruby
130
132
  licenses:
131
133
  - Elastic-2.0
132
134
  metadata:
133
- revision: 294214a26b0fe9a4347763b01de681c336e8daae
134
- repository: https://github.com/elastic/connectors-ruby.git
135
- post_install_message:
135
+ revision: ae6292137eef9acac1259c5e7e71a3d0e149210b
136
+ repository: https://github.com/elastic/connectors-ruby
137
+ post_install_message:
136
138
  rdoc_options: []
137
139
  require_paths:
138
140
  - lib
@@ -143,12 +145,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
143
145
  version: '0'
144
146
  required_rubygems_version: !ruby/object:Gem::Requirement
145
147
  requirements:
146
- - - ">"
148
+ - - ">="
147
149
  - !ruby/object:Gem::Version
148
- version: 1.3.1
150
+ version: '0'
149
151
  requirements: []
150
152
  rubygems_version: 3.0.3.1
151
- signing_key:
153
+ signing_key:
152
154
  specification_version: 4
153
155
  summary: Gem containing shared Connector Services libraries
154
156
  test_files: []