connectors_utility 8.6.0.4.pre.20221107T145613Z → 8.6.0.4.pre.20221114T235050Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/scheduler.rb +40 -10
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3b178cb45ca62a666074e0370d28f4b82477d2ee32e6e7a74b24c218f02c121
|
4
|
+
data.tar.gz: 0b99bd3126a5fdc7cad2ee2b9dc5862284992911043fc7c1931aaac9ea3844d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31b38cf34d989cb09c2ab61356f330a22224fe58f76b5a02db12da13ed3d3e6329244cc75b9eeb293312510df9ccb1e0ff615ec65c9dc0e832e8dbe2c5c25328
|
7
|
+
data.tar.gz: e9b2b753f2ac8135372303f750cb6630d364d0af802a7b5d980cb5ea470e57a3d23c949d2d60356e585da7c9d15383528982ecb80d1eaca2bb36f12854633950
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/registry'
|
11
12
|
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility'
|
13
14
|
|
@@ -34,13 +35,15 @@ module Core
|
|
34
35
|
new(es_response, connectors_meta)
|
35
36
|
end
|
36
37
|
|
37
|
-
def initialize(es_response, connectors_meta)
|
38
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
39
|
-
@connectors_meta = connectors_meta.with_indifferent_access
|
40
|
-
end
|
41
|
-
|
42
38
|
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
43
|
-
query = {
|
39
|
+
query = {
|
40
|
+
bool: {
|
41
|
+
filter: [
|
42
|
+
{ term: { is_native: true } },
|
43
|
+
{ terms: { service_type: Connectors::REGISTRY.registered_connectors } }
|
44
|
+
]
|
45
|
+
}
|
46
|
+
}
|
44
47
|
fetch_connectors_by_query(query, page_size)
|
45
48
|
end
|
46
49
|
|
@@ -83,23 +86,26 @@ module Core
|
|
83
86
|
end
|
84
87
|
|
85
88
|
def filtering
|
86
|
-
|
89
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
90
|
+
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
91
|
+
|
92
|
+
Utility::Filtering.extract_filter(filtering)
|
87
93
|
end
|
88
94
|
|
89
95
|
def request_pipeline
|
90
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
96
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
91
97
|
end
|
92
98
|
|
93
99
|
def extract_binary_content?
|
94
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
100
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
95
101
|
end
|
96
102
|
|
97
103
|
def reduce_whitespace?
|
98
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
104
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
99
105
|
end
|
100
106
|
|
101
107
|
def run_ml_inference?
|
102
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
108
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
103
109
|
end
|
104
110
|
|
105
111
|
def formatted
|
@@ -116,6 +122,13 @@ module Core
|
|
116
122
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
117
123
|
end
|
118
124
|
|
125
|
+
private
|
126
|
+
|
127
|
+
def initialize(es_response, connectors_meta)
|
128
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
129
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
130
|
+
end
|
131
|
+
|
119
132
|
def self.fetch_connectors_by_query(query, page_size)
|
120
133
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
121
134
|
|
@@ -19,6 +19,12 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class JobNotCreatedError < StandardError
|
23
|
+
def initialize(connector_id, response)
|
24
|
+
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
class ConnectorVersionChangedError < StandardError
|
23
29
|
def initialize(connector_id, seq_no, primary_term)
|
24
30
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -43,10 +49,17 @@ module Core
|
|
43
49
|
end
|
44
50
|
|
45
51
|
def get_connector(connector_id)
|
52
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
46
53
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
47
54
|
end
|
48
55
|
|
56
|
+
def get_job(job_id)
|
57
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
+
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
+
end
|
60
|
+
|
49
61
|
def connectors_meta
|
62
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
50
63
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
51
64
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
52
65
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -65,6 +78,19 @@ module Core
|
|
65
78
|
)
|
66
79
|
end
|
67
80
|
|
81
|
+
def search_jobs(query, page_size, offset)
|
82
|
+
client.search(
|
83
|
+
:index => Utility::Constants::JOB_INDEX,
|
84
|
+
:ignore => 404,
|
85
|
+
:body => {
|
86
|
+
:size => page_size,
|
87
|
+
:from => offset,
|
88
|
+
:query => query,
|
89
|
+
:sort => ['created_at']
|
90
|
+
}
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
68
94
|
def update_connector_configuration(connector_id, configuration)
|
69
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
70
96
|
end
|
@@ -84,6 +110,28 @@ module Core
|
|
84
110
|
update_connector_configuration(connector_id, payload)
|
85
111
|
end
|
86
112
|
|
113
|
+
def update_filtering_validation(connector_id, filter_validation_results)
|
114
|
+
return if filter_validation_results.empty?
|
115
|
+
|
116
|
+
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
+
|
118
|
+
case filtering
|
119
|
+
when Hash
|
120
|
+
update_filter_validation(filtering, filter_validation_results)
|
121
|
+
when Array
|
122
|
+
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
+
|
124
|
+
filtering.each do |filter|
|
125
|
+
update_filter_validation(filter, filter_validation_results)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
+
end
|
134
|
+
|
87
135
|
def claim_job(connector_id)
|
88
136
|
seq_no = nil
|
89
137
|
primary_term = nil
|
@@ -111,24 +159,38 @@ module Core
|
|
111
159
|
)
|
112
160
|
|
113
161
|
body = {
|
114
|
-
:connector_id => connector_id,
|
115
162
|
:status => Connectors::SyncStatus::IN_PROGRESS,
|
116
163
|
:worker_hostname => Socket.gethostname,
|
117
164
|
:created_at => Time.now,
|
118
|
-
:
|
165
|
+
:started_at => Time.now,
|
166
|
+
:last_seen => Time.now,
|
167
|
+
:connector => {
|
168
|
+
:id => connector_id,
|
169
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
170
|
+
}
|
119
171
|
}
|
120
172
|
|
121
|
-
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
173
|
+
index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
|
174
|
+
if index_response['result'] == 'created'
|
175
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
176
|
+
return client.get(
|
177
|
+
:index => Utility::Constants::JOB_INDEX,
|
178
|
+
:id => index_response['_id'],
|
179
|
+
:ignore => 404
|
180
|
+
).with_indifferent_access
|
181
|
+
end
|
182
|
+
raise JobNotCreatedError.new(connector_id, index_response)
|
122
183
|
end
|
123
184
|
|
124
185
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
186
|
return [] unless connector_filtering
|
126
187
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
188
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
189
|
+
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
128
190
|
job_filtering << {
|
129
191
|
'domain' => filtering_domain['domain'],
|
130
192
|
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
-
'advanced_snippet' =>
|
193
|
+
'advanced_snippet' => snippet['value'] || snippet,
|
132
194
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
195
|
}
|
134
196
|
end
|
@@ -145,22 +207,33 @@ module Core
|
|
145
207
|
update_connector_fields(connector_id, body)
|
146
208
|
end
|
147
209
|
|
148
|
-
def
|
149
|
-
|
210
|
+
def update_sync(job_id, metadata)
|
211
|
+
body = {
|
212
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
213
|
+
}
|
214
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
215
|
+
end
|
216
|
+
|
217
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
218
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
219
|
+
|
220
|
+
metadata ||= {}
|
150
221
|
|
151
222
|
update_connector_fields(connector_id,
|
152
223
|
:last_sync_status => sync_status,
|
153
|
-
:last_sync_error =>
|
154
|
-
:error =>
|
224
|
+
:last_sync_error => error,
|
225
|
+
:error => error,
|
155
226
|
:last_synced => Time.now,
|
156
|
-
:last_indexed_document_count =>
|
157
|
-
:last_deleted_document_count =>
|
227
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
228
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
158
229
|
|
159
230
|
body = {
|
160
231
|
:doc => {
|
161
232
|
:status => sync_status,
|
162
|
-
:completed_at => Time.now
|
163
|
-
|
233
|
+
:completed_at => Time.now,
|
234
|
+
:last_seen => Time.now,
|
235
|
+
:error => error
|
236
|
+
}.merge(metadata)
|
164
237
|
}
|
165
238
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
166
239
|
end
|
@@ -248,12 +321,105 @@ module Core
|
|
248
321
|
:properties => {
|
249
322
|
:api_key_id => { :type => :keyword },
|
250
323
|
:configuration => { :type => :object },
|
251
|
-
:
|
324
|
+
:description => { :type => :text },
|
325
|
+
:error => { :type => :keyword },
|
326
|
+
:features => {
|
327
|
+
:properties => {
|
328
|
+
:filtering_advanced_config => { :type => :boolean },
|
329
|
+
:filtering_rules => { :type => :boolean }
|
330
|
+
}
|
331
|
+
},
|
332
|
+
:filtering => {
|
333
|
+
:properties => {
|
334
|
+
:domain => { :type => :keyword },
|
335
|
+
:active => {
|
336
|
+
:properties => {
|
337
|
+
:rules => {
|
338
|
+
:properties => {
|
339
|
+
:id => { :type => :keyword },
|
340
|
+
:policy => { :type => :keyword },
|
341
|
+
:field => { :type => :keyword },
|
342
|
+
:rule => { :type => :keyword },
|
343
|
+
:value => { :type => :keyword },
|
344
|
+
:order => { :type => :short },
|
345
|
+
:created_at => { :type => :date },
|
346
|
+
:updated_at => { :type => :date }
|
347
|
+
}
|
348
|
+
},
|
349
|
+
:advanced_snippet => {
|
350
|
+
:properties => {
|
351
|
+
:value => { :type => :object },
|
352
|
+
:created_at => { :type => :date },
|
353
|
+
:updated_at => { :type => :date }
|
354
|
+
}
|
355
|
+
},
|
356
|
+
:validation => {
|
357
|
+
:properties => {
|
358
|
+
:state => { :type => :keyword },
|
359
|
+
:errors => {
|
360
|
+
:properties => {
|
361
|
+
:ids => { :type => :keyword },
|
362
|
+
:messages => { :type => :text }
|
363
|
+
}
|
364
|
+
}
|
365
|
+
}
|
366
|
+
}
|
367
|
+
}
|
368
|
+
},
|
369
|
+
:draft => {
|
370
|
+
:properties => {
|
371
|
+
:rules => {
|
372
|
+
:properties => {
|
373
|
+
:id => { :type => :keyword },
|
374
|
+
:policy => { :type => :keyword },
|
375
|
+
:field => { :type => :keyword },
|
376
|
+
:rule => { :type => :keyword },
|
377
|
+
:value => { :type => :keyword },
|
378
|
+
:order => { :type => :short },
|
379
|
+
:created_at => { :type => :date },
|
380
|
+
:updated_at => { :type => :date }
|
381
|
+
}
|
382
|
+
},
|
383
|
+
:advanced_snippet => {
|
384
|
+
:properties => {
|
385
|
+
:value => { :type => :object },
|
386
|
+
:created_at => { :type => :date },
|
387
|
+
:updated_at => { :type => :date }
|
388
|
+
}
|
389
|
+
},
|
390
|
+
:validation => {
|
391
|
+
:properties => {
|
392
|
+
:state => { :type => :keyword },
|
393
|
+
:errors => {
|
394
|
+
:properties => {
|
395
|
+
:ids => { :type => :keyword },
|
396
|
+
:messages => { :type => :text }
|
397
|
+
}
|
398
|
+
}
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
}
|
403
|
+
}
|
404
|
+
},
|
252
405
|
:index_name => { :type => :keyword },
|
406
|
+
:is_native => { :type => :boolean },
|
407
|
+
:language => { :type => :keyword },
|
253
408
|
:last_seen => { :type => :date },
|
409
|
+
:last_sync_error => { :type => :keyword },
|
410
|
+
:last_sync_status => { :type => :keyword },
|
254
411
|
:last_synced => { :type => :date },
|
255
|
-
:
|
256
|
-
:
|
412
|
+
:last_deleted_document_count => { :type => :long },
|
413
|
+
:last_indexed_document_count => { :type => :long },
|
414
|
+
:name => { :type => :keyword },
|
415
|
+
:pipeline => {
|
416
|
+
:properties => {
|
417
|
+
:extract_binary_content => { :type => :boolean },
|
418
|
+
:name => { :type => :keyword },
|
419
|
+
:reduce_whitespace => { :type => :boolean },
|
420
|
+
:run_ml_inference => { :type => :boolean }
|
421
|
+
}
|
422
|
+
},
|
257
423
|
:scheduling => {
|
258
424
|
:properties => {
|
259
425
|
:enabled => { :type => :boolean },
|
@@ -262,9 +428,7 @@ module Core
|
|
262
428
|
},
|
263
429
|
:service_type => { :type => :keyword },
|
264
430
|
:status => { :type => :keyword },
|
265
|
-
:
|
266
|
-
:sync_now => { :type => :boolean },
|
267
|
-
:sync_status => { :type => :keyword }
|
431
|
+
:sync_now => { :type => :boolean }
|
268
432
|
}
|
269
433
|
}
|
270
434
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -275,14 +439,68 @@ module Core
|
|
275
439
|
def ensure_job_index_exists
|
276
440
|
mappings = {
|
277
441
|
:properties => {
|
278
|
-
:
|
279
|
-
:
|
442
|
+
:cancelation_requested_at => { :type => :date },
|
443
|
+
:canceled_at => { :type => :date },
|
444
|
+
:completed_at => { :type => :date },
|
445
|
+
:connector => {
|
446
|
+
:properties => {
|
447
|
+
:configuration => { :type => :object },
|
448
|
+
:filtering => {
|
449
|
+
:properties => {
|
450
|
+
:domain => { :type => :keyword },
|
451
|
+
:rules => {
|
452
|
+
:properties => {
|
453
|
+
:id => { :type => :keyword },
|
454
|
+
:policy => { :type => :keyword },
|
455
|
+
:field => { :type => :keyword },
|
456
|
+
:rule => { :type => :keyword },
|
457
|
+
:value => { :type => :keyword },
|
458
|
+
:order => { :type => :short },
|
459
|
+
:created_at => { :type => :date },
|
460
|
+
:updated_at => { :type => :date }
|
461
|
+
}
|
462
|
+
},
|
463
|
+
:advanced_snippet => {
|
464
|
+
:properties => {
|
465
|
+
:value => { :type => :object },
|
466
|
+
:created_at => { :type => :date },
|
467
|
+
:updated_at => { :type => :date }
|
468
|
+
}
|
469
|
+
},
|
470
|
+
:warnings => {
|
471
|
+
:properties => {
|
472
|
+
:ids => { :type => :keyword },
|
473
|
+
:messages => { :type => :text }
|
474
|
+
}
|
475
|
+
}
|
476
|
+
}
|
477
|
+
},
|
478
|
+
:id => { :type => :keyword },
|
479
|
+
:index_name => { :type => :keyword },
|
480
|
+
:language => { :type => :keyword },
|
481
|
+
:pipeline => {
|
482
|
+
:properties => {
|
483
|
+
:extract_binary_content => { :type => :boolean },
|
484
|
+
:name => { :type => :keyword },
|
485
|
+
:reduce_whitespace => { :type => :boolean },
|
486
|
+
:run_ml_inference => { :type => :boolean }
|
487
|
+
}
|
488
|
+
},
|
489
|
+
:service_type => { :type => :keyword }
|
490
|
+
}
|
491
|
+
},
|
492
|
+
:created_at => { :type => :date },
|
493
|
+
:deleted_document_count => { :type => :integer },
|
280
494
|
:error => { :type => :text },
|
281
|
-
:worker_hostname => { :type => :keyword },
|
282
495
|
:indexed_document_count => { :type => :integer },
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
496
|
+
:indexed_document_volume => { :type => :integer },
|
497
|
+
:last_seen => { :type => :date },
|
498
|
+
:metadata => { :type => :object },
|
499
|
+
:started_at => { :type => :date },
|
500
|
+
:status => { :type => :keyword },
|
501
|
+
:total_document_count => { :type => :integer },
|
502
|
+
:trigger_method => { :type => :keyword },
|
503
|
+
:worker_hostname => { :type => :keyword }
|
286
504
|
}
|
287
505
|
}
|
288
506
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
@@ -313,8 +531,20 @@ module Core
|
|
313
531
|
end
|
314
532
|
end
|
315
533
|
|
534
|
+
def document_count(index_name)
|
535
|
+
client.count(:index => index_name)['count']
|
536
|
+
end
|
537
|
+
|
316
538
|
private
|
317
539
|
|
540
|
+
def should_update_validations?(domain_validations, filtering)
|
541
|
+
domains_present = filtering.collect { |filter| filter[:domain] }
|
542
|
+
domains_to_update = domain_validations.keys
|
543
|
+
|
544
|
+
# non-empty intersection -> domains to update present
|
545
|
+
!(domains_present & domains_to_update).empty?
|
546
|
+
end
|
547
|
+
|
318
548
|
def client
|
319
549
|
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
550
|
end
|
@@ -324,6 +554,15 @@ module Core
|
|
324
554
|
index_version = index_versions.max # gets the largest suffix number
|
325
555
|
"#{alias_name}-v#{index_version}"
|
326
556
|
end
|
557
|
+
|
558
|
+
def update_filter_validation(filter, domain_validations)
|
559
|
+
domain = filter[:domain]
|
560
|
+
|
561
|
+
if domain_validations.key?(domain)
|
562
|
+
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
563
|
+
filter.deep_merge!(new_validation_state)
|
564
|
+
end
|
565
|
+
end
|
327
566
|
end
|
328
567
|
end
|
329
568
|
end
|
data/lib/core/scheduler.rb
CHANGED
@@ -10,6 +10,7 @@ require 'time'
|
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
13
14
|
require 'utility/cron'
|
14
15
|
require 'utility/logger'
|
15
16
|
require 'utility/exception_tracking'
|
@@ -38,15 +39,18 @@ module Core
|
|
38
39
|
if configuration_triggered?(cs)
|
39
40
|
yield cs, :configuration
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
44
45
|
end
|
45
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
47
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
47
48
|
rescue StandardError => e
|
48
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
49
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
50
54
|
if @poll_interval > 0 && !@is_shutting_down
|
51
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
52
56
|
sleep(@poll_interval)
|
@@ -62,8 +66,6 @@ module Core
|
|
62
66
|
private
|
63
67
|
|
64
68
|
def sync_triggered?(connector_settings)
|
65
|
-
return false unless connector_registered?(connector_settings.service_type)
|
66
|
-
|
67
69
|
unless connector_settings.valid_index_name?
|
68
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
69
71
|
return false
|
@@ -129,8 +131,6 @@ module Core
|
|
129
131
|
end
|
130
132
|
|
131
133
|
def heartbeat_triggered?(connector_settings)
|
132
|
-
return false unless connector_registered?(connector_settings.service_type)
|
133
|
-
|
134
134
|
last_seen = connector_settings[:last_seen]
|
135
135
|
return true if last_seen.nil? || last_seen.empty?
|
136
136
|
last_seen = begin
|
@@ -144,11 +144,41 @@ module Core
|
|
144
144
|
end
|
145
145
|
|
146
146
|
def configuration_triggered?(connector_settings)
|
147
|
-
|
148
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
149
157
|
end
|
150
158
|
|
151
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
152
182
|
end
|
153
183
|
|
154
184
|
def connector_registered?(service_type)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/utility/constants.rb
CHANGED
data/lib/utility/logger.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'config'
|
7
8
|
require 'logger'
|
8
9
|
require 'active_support/core_ext/module'
|
9
10
|
require 'active_support/core_ext/string/filters'
|
@@ -23,7 +24,7 @@ module Utility
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
28
|
end
|
28
29
|
|
29
30
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,15 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'utility/bulk_queue'
|
8
|
+
require 'utility/common'
|
7
9
|
require 'utility/constants'
|
8
10
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
11
|
+
require 'utility/elasticsearch/index/mappings'
|
12
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
|
+
require 'utility/environment'
|
10
14
|
require 'utility/errors'
|
11
15
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
16
|
require 'utility/exception_tracking'
|
14
17
|
require 'utility/extension_mapping_util'
|
15
18
|
require 'utility/logger'
|
16
|
-
require 'utility/elasticsearch/index/mappings'
|
17
|
-
require 'utility/elasticsearch/index/text_analysis_settings'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.4.pre.
|
4
|
+
version: 8.6.0.4.pre.20221114T235050Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -110,6 +110,7 @@ files:
|
|
110
110
|
- lib/core/elastic_connector_actions.rb
|
111
111
|
- lib/core/scheduler.rb
|
112
112
|
- lib/utility.rb
|
113
|
+
- lib/utility/bulk_queue.rb
|
113
114
|
- lib/utility/common.rb
|
114
115
|
- lib/utility/constants.rb
|
115
116
|
- lib/utility/cron.rb
|
@@ -126,7 +127,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
126
127
|
licenses:
|
127
128
|
- Elastic-2.0
|
128
129
|
metadata:
|
129
|
-
revision:
|
130
|
+
revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
|
130
131
|
repository: git@github.com:elastic/ent-search-connectors.git
|
131
132
|
post_install_message:
|
132
133
|
rdoc_options: []
|