connectors_utility 8.6.0.4.pre.20221107T145613Z → 8.6.0.4.pre.20221114T235050Z

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3dc548bffdaf2be6ac65e4c9231196f2b542d3cff6a092e3756272a62312d4b
4
- data.tar.gz: fe6f7d03c9f2d1c56faeb501b7823c38abef582ce2edaade3a26d8c28dfad7ac
3
+ metadata.gz: b3b178cb45ca62a666074e0370d28f4b82477d2ee32e6e7a74b24c218f02c121
4
+ data.tar.gz: 0b99bd3126a5fdc7cad2ee2b9dc5862284992911043fc7c1931aaac9ea3844d8
5
5
  SHA512:
6
- metadata.gz: 87400cec0be6b368834cd0171dd8ca38df5b5b7928b0e10da7c24a29a89a06a89af22c5a4317f8a683135cabfd2871412b26b9da61924ad2356612a4219be7c1
7
- data.tar.gz: 249ed7dad12806e43c50b5c409085fcdb902f3e1e386bdfdf7db5d551dc0377d5a808cb8f4f49f9f01edebb548e7baebe1b8e9c249456a9efacb0496bf04d9c5
6
+ metadata.gz: 31b38cf34d989cb09c2ab61356f330a22224fe58f76b5a02db12da13ed3d3e6329244cc75b9eeb293312510df9ccb1e0ff615ec65c9dc0e832e8dbe2c5c25328
7
+ data.tar.gz: e9b2b753f2ac8135372303f750cb6630d364d0af802a7b5d980cb5ea470e57a3d23c949d2d60356e585da7c9d15383528982ecb80d1eaca2bb36f12854633950
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/registry'
11
12
  require 'core/elastic_connector_actions'
12
13
  require 'utility'
13
14
 
@@ -34,13 +35,15 @@ module Core
34
35
  new(es_response, connectors_meta)
35
36
  end
36
37
 
37
- def initialize(es_response, connectors_meta)
38
- @elasticsearch_response = es_response.with_indifferent_access
39
- @connectors_meta = connectors_meta.with_indifferent_access
40
- end
41
-
42
38
  def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
43
- query = { term: { is_native: true } }
39
+ query = {
40
+ bool: {
41
+ filter: [
42
+ { term: { is_native: true } },
43
+ { terms: { service_type: Connectors::REGISTRY.registered_connectors } }
44
+ ]
45
+ }
46
+ }
44
47
  fetch_connectors_by_query(query, page_size)
45
48
  end
46
49
 
@@ -83,23 +86,26 @@ module Core
83
86
  end
84
87
 
85
88
  def filtering
86
- Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
89
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
90
+ filtering = @elasticsearch_response.dig(:_source, :filtering)
91
+
92
+ Utility::Filtering.extract_filter(filtering)
87
93
  end
88
94
 
89
95
  def request_pipeline
90
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
96
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
91
97
  end
92
98
 
93
99
  def extract_binary_content?
94
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
100
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
95
101
  end
96
102
 
97
103
  def reduce_whitespace?
98
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
104
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
99
105
  end
100
106
 
101
107
  def run_ml_inference?
102
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
108
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
103
109
  end
104
110
 
105
111
  def formatted
@@ -116,6 +122,13 @@ module Core
116
122
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
117
123
  end
118
124
 
125
+ private
126
+
127
+ def initialize(es_response, connectors_meta)
128
+ @elasticsearch_response = es_response.with_indifferent_access
129
+ @connectors_meta = connectors_meta.with_indifferent_access
130
+ end
131
+
119
132
  def self.fetch_connectors_by_query(query, page_size)
120
133
  connectors_meta = ElasticConnectorActions.connectors_meta
121
134
 
@@ -19,6 +19,12 @@ module Core
19
19
  end
20
20
  end
21
21
 
22
+ class JobNotCreatedError < StandardError
23
+ def initialize(connector_id, response)
24
+ super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
+ end
26
+ end
27
+
22
28
  class ConnectorVersionChangedError < StandardError
23
29
  def initialize(connector_id, seq_no, primary_term)
24
30
  super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
@@ -43,10 +49,17 @@ module Core
43
49
  end
44
50
 
45
51
  def get_connector(connector_id)
52
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
46
53
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
47
54
  end
48
55
 
56
+ def get_job(job_id)
57
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
+ client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
+ end
60
+
49
61
  def connectors_meta
62
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
50
63
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
51
64
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
52
65
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -65,6 +78,19 @@ module Core
65
78
  )
66
79
  end
67
80
 
81
+ def search_jobs(query, page_size, offset)
82
+ client.search(
83
+ :index => Utility::Constants::JOB_INDEX,
84
+ :ignore => 404,
85
+ :body => {
86
+ :size => page_size,
87
+ :from => offset,
88
+ :query => query,
89
+ :sort => ['created_at']
90
+ }
91
+ )
92
+ end
93
+
68
94
  def update_connector_configuration(connector_id, configuration)
69
95
  update_connector_fields(connector_id, :configuration => configuration)
70
96
  end
@@ -84,6 +110,28 @@ module Core
84
110
  update_connector_configuration(connector_id, payload)
85
111
  end
86
112
 
113
+ def update_filtering_validation(connector_id, filter_validation_results)
114
+ return if filter_validation_results.empty?
115
+
116
+ filtering = get_connector(connector_id).dig(:_source, :filtering)
117
+
118
+ case filtering
119
+ when Hash
120
+ update_filter_validation(filtering, filter_validation_results)
121
+ when Array
122
+ return unless should_update_validations?(filter_validation_results, filtering)
123
+
124
+ filtering.each do |filter|
125
+ update_filter_validation(filter, filter_validation_results)
126
+ end
127
+ else
128
+ Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
+ return
130
+ end
131
+
132
+ update_connector_fields(connector_id, { :filtering => filtering })
133
+ end
134
+
87
135
  def claim_job(connector_id)
88
136
  seq_no = nil
89
137
  primary_term = nil
@@ -111,24 +159,38 @@ module Core
111
159
  )
112
160
 
113
161
  body = {
114
- :connector_id => connector_id,
115
162
  :status => Connectors::SyncStatus::IN_PROGRESS,
116
163
  :worker_hostname => Socket.gethostname,
117
164
  :created_at => Time.now,
118
- :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
165
+ :started_at => Time.now,
166
+ :last_seen => Time.now,
167
+ :connector => {
168
+ :id => connector_id,
169
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
170
+ }
119
171
  }
120
172
 
121
- client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
173
+ index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
+ if index_response['result'] == 'created'
175
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
+ return client.get(
177
+ :index => Utility::Constants::JOB_INDEX,
178
+ :id => index_response['_id'],
179
+ :ignore => 404
180
+ ).with_indifferent_access
181
+ end
182
+ raise JobNotCreatedError.new(connector_id, index_response)
122
183
  end
123
184
 
124
185
  def convert_connector_filtering_to_job_filtering(connector_filtering)
125
186
  return [] unless connector_filtering
126
187
  connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
127
188
  connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
189
+ snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
128
190
  job_filtering << {
129
191
  'domain' => filtering_domain['domain'],
130
192
  'rules' => filtering_domain.dig('active', 'rules'),
131
- 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
193
+ 'advanced_snippet' => snippet['value'] || snippet,
132
194
  'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
133
195
  }
134
196
  end
@@ -145,22 +207,33 @@ module Core
145
207
  update_connector_fields(connector_id, body)
146
208
  end
147
209
 
148
- def complete_sync(connector_id, job_id, status)
149
- sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
210
+ def update_sync(job_id, metadata)
211
+ body = {
212
+ :doc => { :last_seen => Time.now }.merge(metadata)
213
+ }
214
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
215
+ end
216
+
217
+ def complete_sync(connector_id, job_id, metadata, error)
218
+ sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
219
+
220
+ metadata ||= {}
150
221
 
151
222
  update_connector_fields(connector_id,
152
223
  :last_sync_status => sync_status,
153
- :last_sync_error => status[:error],
154
- :error => status[:error],
224
+ :last_sync_error => error,
225
+ :error => error,
155
226
  :last_synced => Time.now,
156
- :last_indexed_document_count => status[:indexed_document_count],
157
- :last_deleted_document_count => status[:deleted_document_count])
227
+ :last_indexed_document_count => metadata[:indexed_document_count],
228
+ :last_deleted_document_count => metadata[:deleted_document_count])
158
229
 
159
230
  body = {
160
231
  :doc => {
161
232
  :status => sync_status,
162
- :completed_at => Time.now
163
- }.merge(status)
233
+ :completed_at => Time.now,
234
+ :last_seen => Time.now,
235
+ :error => error
236
+ }.merge(metadata)
164
237
  }
165
238
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
166
239
  end
@@ -248,12 +321,105 @@ module Core
248
321
  :properties => {
249
322
  :api_key_id => { :type => :keyword },
250
323
  :configuration => { :type => :object },
251
- :error => { :type => :text },
324
+ :description => { :type => :text },
325
+ :error => { :type => :keyword },
326
+ :features => {
327
+ :properties => {
328
+ :filtering_advanced_config => { :type => :boolean },
329
+ :filtering_rules => { :type => :boolean }
330
+ }
331
+ },
332
+ :filtering => {
333
+ :properties => {
334
+ :domain => { :type => :keyword },
335
+ :active => {
336
+ :properties => {
337
+ :rules => {
338
+ :properties => {
339
+ :id => { :type => :keyword },
340
+ :policy => { :type => :keyword },
341
+ :field => { :type => :keyword },
342
+ :rule => { :type => :keyword },
343
+ :value => { :type => :keyword },
344
+ :order => { :type => :short },
345
+ :created_at => { :type => :date },
346
+ :updated_at => { :type => :date }
347
+ }
348
+ },
349
+ :advanced_snippet => {
350
+ :properties => {
351
+ :value => { :type => :object },
352
+ :created_at => { :type => :date },
353
+ :updated_at => { :type => :date }
354
+ }
355
+ },
356
+ :validation => {
357
+ :properties => {
358
+ :state => { :type => :keyword },
359
+ :errors => {
360
+ :properties => {
361
+ :ids => { :type => :keyword },
362
+ :messages => { :type => :text }
363
+ }
364
+ }
365
+ }
366
+ }
367
+ }
368
+ },
369
+ :draft => {
370
+ :properties => {
371
+ :rules => {
372
+ :properties => {
373
+ :id => { :type => :keyword },
374
+ :policy => { :type => :keyword },
375
+ :field => { :type => :keyword },
376
+ :rule => { :type => :keyword },
377
+ :value => { :type => :keyword },
378
+ :order => { :type => :short },
379
+ :created_at => { :type => :date },
380
+ :updated_at => { :type => :date }
381
+ }
382
+ },
383
+ :advanced_snippet => {
384
+ :properties => {
385
+ :value => { :type => :object },
386
+ :created_at => { :type => :date },
387
+ :updated_at => { :type => :date }
388
+ }
389
+ },
390
+ :validation => {
391
+ :properties => {
392
+ :state => { :type => :keyword },
393
+ :errors => {
394
+ :properties => {
395
+ :ids => { :type => :keyword },
396
+ :messages => { :type => :text }
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+ },
252
405
  :index_name => { :type => :keyword },
406
+ :is_native => { :type => :boolean },
407
+ :language => { :type => :keyword },
253
408
  :last_seen => { :type => :date },
409
+ :last_sync_error => { :type => :keyword },
410
+ :last_sync_status => { :type => :keyword },
254
411
  :last_synced => { :type => :date },
255
- :last_indexed_document_count => { :type => :integer },
256
- :last_deleted_document_count => { :type => :integer },
412
+ :last_deleted_document_count => { :type => :long },
413
+ :last_indexed_document_count => { :type => :long },
414
+ :name => { :type => :keyword },
415
+ :pipeline => {
416
+ :properties => {
417
+ :extract_binary_content => { :type => :boolean },
418
+ :name => { :type => :keyword },
419
+ :reduce_whitespace => { :type => :boolean },
420
+ :run_ml_inference => { :type => :boolean }
421
+ }
422
+ },
257
423
  :scheduling => {
258
424
  :properties => {
259
425
  :enabled => { :type => :boolean },
@@ -262,9 +428,7 @@ module Core
262
428
  },
263
429
  :service_type => { :type => :keyword },
264
430
  :status => { :type => :keyword },
265
- :sync_error => { :type => :text },
266
- :sync_now => { :type => :boolean },
267
- :sync_status => { :type => :keyword }
431
+ :sync_now => { :type => :boolean }
268
432
  }
269
433
  }
270
434
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -275,14 +439,68 @@ module Core
275
439
  def ensure_job_index_exists
276
440
  mappings = {
277
441
  :properties => {
278
- :connector_id => { :type => :keyword },
279
- :status => { :type => :keyword },
442
+ :cancelation_requested_at => { :type => :date },
443
+ :canceled_at => { :type => :date },
444
+ :completed_at => { :type => :date },
445
+ :connector => {
446
+ :properties => {
447
+ :configuration => { :type => :object },
448
+ :filtering => {
449
+ :properties => {
450
+ :domain => { :type => :keyword },
451
+ :rules => {
452
+ :properties => {
453
+ :id => { :type => :keyword },
454
+ :policy => { :type => :keyword },
455
+ :field => { :type => :keyword },
456
+ :rule => { :type => :keyword },
457
+ :value => { :type => :keyword },
458
+ :order => { :type => :short },
459
+ :created_at => { :type => :date },
460
+ :updated_at => { :type => :date }
461
+ }
462
+ },
463
+ :advanced_snippet => {
464
+ :properties => {
465
+ :value => { :type => :object },
466
+ :created_at => { :type => :date },
467
+ :updated_at => { :type => :date }
468
+ }
469
+ },
470
+ :warnings => {
471
+ :properties => {
472
+ :ids => { :type => :keyword },
473
+ :messages => { :type => :text }
474
+ }
475
+ }
476
+ }
477
+ },
478
+ :id => { :type => :keyword },
479
+ :index_name => { :type => :keyword },
480
+ :language => { :type => :keyword },
481
+ :pipeline => {
482
+ :properties => {
483
+ :extract_binary_content => { :type => :boolean },
484
+ :name => { :type => :keyword },
485
+ :reduce_whitespace => { :type => :boolean },
486
+ :run_ml_inference => { :type => :boolean }
487
+ }
488
+ },
489
+ :service_type => { :type => :keyword }
490
+ }
491
+ },
492
+ :created_at => { :type => :date },
493
+ :deleted_document_count => { :type => :integer },
280
494
  :error => { :type => :text },
281
- :worker_hostname => { :type => :keyword },
282
495
  :indexed_document_count => { :type => :integer },
283
- :deleted_document_count => { :type => :integer },
284
- :created_at => { :type => :date },
285
- :completed_at => { :type => :date }
496
+ :indexed_document_volume => { :type => :integer },
497
+ :last_seen => { :type => :date },
498
+ :metadata => { :type => :object },
499
+ :started_at => { :type => :date },
500
+ :status => { :type => :keyword },
501
+ :total_document_count => { :type => :integer },
502
+ :trigger_method => { :type => :keyword },
503
+ :worker_hostname => { :type => :keyword }
286
504
  }
287
505
  }
288
506
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
@@ -313,8 +531,20 @@ module Core
313
531
  end
314
532
  end
315
533
 
534
+ def document_count(index_name)
535
+ client.count(:index => index_name)['count']
536
+ end
537
+
316
538
  private
317
539
 
540
+ def should_update_validations?(domain_validations, filtering)
541
+ domains_present = filtering.collect { |filter| filter[:domain] }
542
+ domains_to_update = domain_validations.keys
543
+
544
+ # non-empty intersection -> domains to update present
545
+ !(domains_present & domains_to_update).empty?
546
+ end
547
+
318
548
  def client
319
549
  @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
320
550
  end
@@ -324,6 +554,15 @@ module Core
324
554
  index_version = index_versions.max # gets the largest suffix number
325
555
  "#{alias_name}-v#{index_version}"
326
556
  end
557
+
558
+ def update_filter_validation(filter, domain_validations)
559
+ domain = filter[:domain]
560
+
561
+ if domain_validations.key?(domain)
562
+ new_validation_state = { :draft => { :validation => domain_validations[domain] } }
563
+ filter.deep_merge!(new_validation_state)
564
+ end
565
+ end
327
566
  end
328
567
  end
329
568
  end
@@ -10,6 +10,7 @@ require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering/validation_status'
13
14
  require 'utility/cron'
14
15
  require 'utility/logger'
15
16
  require 'utility/exception_tracking'
@@ -38,15 +39,18 @@ module Core
38
39
  if configuration_triggered?(cs)
39
40
  yield cs, :configuration
40
41
  end
41
- end
42
- if @is_shutting_down
43
- break
42
+ if filtering_validation_triggered?(cs)
43
+ yield cs, :filter_validation
44
+ end
44
45
  end
45
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
46
47
  Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
48
  rescue StandardError => e
48
49
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
50
  ensure
51
+ if @is_shutting_down
52
+ break
53
+ end
50
54
  if @poll_interval > 0 && !@is_shutting_down
51
55
  Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
52
56
  sleep(@poll_interval)
@@ -62,8 +66,6 @@ module Core
62
66
  private
63
67
 
64
68
  def sync_triggered?(connector_settings)
65
- return false unless connector_registered?(connector_settings.service_type)
66
-
67
69
  unless connector_settings.valid_index_name?
68
70
  Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
69
71
  return false
@@ -129,8 +131,6 @@ module Core
129
131
  end
130
132
 
131
133
  def heartbeat_triggered?(connector_settings)
132
- return false unless connector_registered?(connector_settings.service_type)
133
-
134
134
  last_seen = connector_settings[:last_seen]
135
135
  return true if last_seen.nil? || last_seen.empty?
136
136
  last_seen = begin
@@ -144,11 +144,41 @@ module Core
144
144
  end
145
145
 
146
146
  def configuration_triggered?(connector_settings)
147
- if connector_settings.needs_service_type? || connector_registered?(connector_settings.service_type)
148
- return connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
148
+ end
149
+
150
+ def filtering_validation_triggered?(connector_settings)
151
+ filtering = connector_settings.filtering
152
+
153
+ unless filtering.present?
154
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
155
+
156
+ return false
149
157
  end
150
158
 
151
- false
159
+ draft_filters = filtering[:draft]
160
+
161
+ unless draft_filters.present?
162
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
163
+
164
+ return false
165
+ end
166
+
167
+ validation = draft_filters[:validation]
168
+
169
+ unless validation.present?
170
+ Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
171
+
172
+ return false
173
+ end
174
+
175
+ unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
176
+ Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
177
+
178
+ return false
179
+ end
180
+
181
+ true
152
182
  end
153
183
 
154
184
  def connector_registered?(service_type)
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end
@@ -4,6 +4,7 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'config'
7
8
  require 'logger'
8
9
  require 'active_support/core_ext/module'
9
10
  require 'active_support/core_ext/string/filters'
@@ -23,7 +24,7 @@ module Utility
23
24
  end
24
25
 
25
26
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
28
  end
28
29
 
29
30
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,15 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'utility/bulk_queue'
8
+ require 'utility/common'
7
9
  require 'utility/constants'
8
10
  require 'utility/cron'
9
- require 'utility/common'
11
+ require 'utility/elasticsearch/index/mappings'
12
+ require 'utility/elasticsearch/index/text_analysis_settings'
13
+ require 'utility/environment'
10
14
  require 'utility/errors'
11
15
  require 'utility/es_client'
12
- require 'utility/environment'
13
16
  require 'utility/exception_tracking'
14
17
  require 'utility/extension_mapping_util'
15
18
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221107T145613Z
4
+ version: 8.6.0.4.pre.20221114T235050Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-07 00:00:00.000000000 Z
11
+ date: 2022-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -110,6 +110,7 @@ files:
110
110
  - lib/core/elastic_connector_actions.rb
111
111
  - lib/core/scheduler.rb
112
112
  - lib/utility.rb
113
+ - lib/utility/bulk_queue.rb
113
114
  - lib/utility/common.rb
114
115
  - lib/utility/constants.rb
115
116
  - lib/utility/cron.rb
@@ -126,7 +127,7 @@ homepage: https://github.com/elastic/connectors-ruby
126
127
  licenses:
127
128
  - Elastic-2.0
128
129
  metadata:
129
- revision: aa0e7e495d6f49de2bc05991b793b18b0c1ca78d
130
+ revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
130
131
  repository: git@github.com:elastic/ent-search-connectors.git
131
132
  post_install_message:
132
133
  rdoc_options: []