connectors_utility 8.6.0.4.pre.20221107T145613Z → 8.6.0.4.pre.20221114T235050Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/scheduler.rb +40 -10
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3b178cb45ca62a666074e0370d28f4b82477d2ee32e6e7a74b24c218f02c121
|
4
|
+
data.tar.gz: 0b99bd3126a5fdc7cad2ee2b9dc5862284992911043fc7c1931aaac9ea3844d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31b38cf34d989cb09c2ab61356f330a22224fe58f76b5a02db12da13ed3d3e6329244cc75b9eeb293312510df9ccb1e0ff615ec65c9dc0e832e8dbe2c5c25328
|
7
|
+
data.tar.gz: e9b2b753f2ac8135372303f750cb6630d364d0af802a7b5d980cb5ea470e57a3d23c949d2d60356e585da7c9d15383528982ecb80d1eaca2bb36f12854633950
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/registry'
|
11
12
|
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility'
|
13
14
|
|
@@ -34,13 +35,15 @@ module Core
|
|
34
35
|
new(es_response, connectors_meta)
|
35
36
|
end
|
36
37
|
|
37
|
-
def initialize(es_response, connectors_meta)
|
38
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
39
|
-
@connectors_meta = connectors_meta.with_indifferent_access
|
40
|
-
end
|
41
|
-
|
42
38
|
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
43
|
-
query = {
|
39
|
+
query = {
|
40
|
+
bool: {
|
41
|
+
filter: [
|
42
|
+
{ term: { is_native: true } },
|
43
|
+
{ terms: { service_type: Connectors::REGISTRY.registered_connectors } }
|
44
|
+
]
|
45
|
+
}
|
46
|
+
}
|
44
47
|
fetch_connectors_by_query(query, page_size)
|
45
48
|
end
|
46
49
|
|
@@ -83,23 +86,26 @@ module Core
|
|
83
86
|
end
|
84
87
|
|
85
88
|
def filtering
|
86
|
-
|
89
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
90
|
+
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
91
|
+
|
92
|
+
Utility::Filtering.extract_filter(filtering)
|
87
93
|
end
|
88
94
|
|
89
95
|
def request_pipeline
|
90
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
96
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
91
97
|
end
|
92
98
|
|
93
99
|
def extract_binary_content?
|
94
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
100
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
95
101
|
end
|
96
102
|
|
97
103
|
def reduce_whitespace?
|
98
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
104
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
99
105
|
end
|
100
106
|
|
101
107
|
def run_ml_inference?
|
102
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
108
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
103
109
|
end
|
104
110
|
|
105
111
|
def formatted
|
@@ -116,6 +122,13 @@ module Core
|
|
116
122
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
117
123
|
end
|
118
124
|
|
125
|
+
private
|
126
|
+
|
127
|
+
def initialize(es_response, connectors_meta)
|
128
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
129
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
130
|
+
end
|
131
|
+
|
119
132
|
def self.fetch_connectors_by_query(query, page_size)
|
120
133
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
121
134
|
|
@@ -19,6 +19,12 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class JobNotCreatedError < StandardError
|
23
|
+
def initialize(connector_id, response)
|
24
|
+
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
class ConnectorVersionChangedError < StandardError
|
23
29
|
def initialize(connector_id, seq_no, primary_term)
|
24
30
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -43,10 +49,17 @@ module Core
|
|
43
49
|
end
|
44
50
|
|
45
51
|
def get_connector(connector_id)
|
52
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
46
53
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
47
54
|
end
|
48
55
|
|
56
|
+
def get_job(job_id)
|
57
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
+
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
+
end
|
60
|
+
|
49
61
|
def connectors_meta
|
62
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
50
63
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
51
64
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
52
65
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -65,6 +78,19 @@ module Core
|
|
65
78
|
)
|
66
79
|
end
|
67
80
|
|
81
|
+
def search_jobs(query, page_size, offset)
|
82
|
+
client.search(
|
83
|
+
:index => Utility::Constants::JOB_INDEX,
|
84
|
+
:ignore => 404,
|
85
|
+
:body => {
|
86
|
+
:size => page_size,
|
87
|
+
:from => offset,
|
88
|
+
:query => query,
|
89
|
+
:sort => ['created_at']
|
90
|
+
}
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
68
94
|
def update_connector_configuration(connector_id, configuration)
|
69
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
70
96
|
end
|
@@ -84,6 +110,28 @@ module Core
|
|
84
110
|
update_connector_configuration(connector_id, payload)
|
85
111
|
end
|
86
112
|
|
113
|
+
def update_filtering_validation(connector_id, filter_validation_results)
|
114
|
+
return if filter_validation_results.empty?
|
115
|
+
|
116
|
+
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
+
|
118
|
+
case filtering
|
119
|
+
when Hash
|
120
|
+
update_filter_validation(filtering, filter_validation_results)
|
121
|
+
when Array
|
122
|
+
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
+
|
124
|
+
filtering.each do |filter|
|
125
|
+
update_filter_validation(filter, filter_validation_results)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
+
end
|
134
|
+
|
87
135
|
def claim_job(connector_id)
|
88
136
|
seq_no = nil
|
89
137
|
primary_term = nil
|
@@ -111,24 +159,38 @@ module Core
|
|
111
159
|
)
|
112
160
|
|
113
161
|
body = {
|
114
|
-
:connector_id => connector_id,
|
115
162
|
:status => Connectors::SyncStatus::IN_PROGRESS,
|
116
163
|
:worker_hostname => Socket.gethostname,
|
117
164
|
:created_at => Time.now,
|
118
|
-
:
|
165
|
+
:started_at => Time.now,
|
166
|
+
:last_seen => Time.now,
|
167
|
+
:connector => {
|
168
|
+
:id => connector_id,
|
169
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
170
|
+
}
|
119
171
|
}
|
120
172
|
|
121
|
-
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
173
|
+
index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
|
174
|
+
if index_response['result'] == 'created'
|
175
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
176
|
+
return client.get(
|
177
|
+
:index => Utility::Constants::JOB_INDEX,
|
178
|
+
:id => index_response['_id'],
|
179
|
+
:ignore => 404
|
180
|
+
).with_indifferent_access
|
181
|
+
end
|
182
|
+
raise JobNotCreatedError.new(connector_id, index_response)
|
122
183
|
end
|
123
184
|
|
124
185
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
186
|
return [] unless connector_filtering
|
126
187
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
188
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
189
|
+
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
128
190
|
job_filtering << {
|
129
191
|
'domain' => filtering_domain['domain'],
|
130
192
|
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
-
'advanced_snippet' =>
|
193
|
+
'advanced_snippet' => snippet['value'] || snippet,
|
132
194
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
195
|
}
|
134
196
|
end
|
@@ -145,22 +207,33 @@ module Core
|
|
145
207
|
update_connector_fields(connector_id, body)
|
146
208
|
end
|
147
209
|
|
148
|
-
def
|
149
|
-
|
210
|
+
def update_sync(job_id, metadata)
|
211
|
+
body = {
|
212
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
213
|
+
}
|
214
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
215
|
+
end
|
216
|
+
|
217
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
218
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
219
|
+
|
220
|
+
metadata ||= {}
|
150
221
|
|
151
222
|
update_connector_fields(connector_id,
|
152
223
|
:last_sync_status => sync_status,
|
153
|
-
:last_sync_error =>
|
154
|
-
:error =>
|
224
|
+
:last_sync_error => error,
|
225
|
+
:error => error,
|
155
226
|
:last_synced => Time.now,
|
156
|
-
:last_indexed_document_count =>
|
157
|
-
:last_deleted_document_count =>
|
227
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
228
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
158
229
|
|
159
230
|
body = {
|
160
231
|
:doc => {
|
161
232
|
:status => sync_status,
|
162
|
-
:completed_at => Time.now
|
163
|
-
|
233
|
+
:completed_at => Time.now,
|
234
|
+
:last_seen => Time.now,
|
235
|
+
:error => error
|
236
|
+
}.merge(metadata)
|
164
237
|
}
|
165
238
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
166
239
|
end
|
@@ -248,12 +321,105 @@ module Core
|
|
248
321
|
:properties => {
|
249
322
|
:api_key_id => { :type => :keyword },
|
250
323
|
:configuration => { :type => :object },
|
251
|
-
:
|
324
|
+
:description => { :type => :text },
|
325
|
+
:error => { :type => :keyword },
|
326
|
+
:features => {
|
327
|
+
:properties => {
|
328
|
+
:filtering_advanced_config => { :type => :boolean },
|
329
|
+
:filtering_rules => { :type => :boolean }
|
330
|
+
}
|
331
|
+
},
|
332
|
+
:filtering => {
|
333
|
+
:properties => {
|
334
|
+
:domain => { :type => :keyword },
|
335
|
+
:active => {
|
336
|
+
:properties => {
|
337
|
+
:rules => {
|
338
|
+
:properties => {
|
339
|
+
:id => { :type => :keyword },
|
340
|
+
:policy => { :type => :keyword },
|
341
|
+
:field => { :type => :keyword },
|
342
|
+
:rule => { :type => :keyword },
|
343
|
+
:value => { :type => :keyword },
|
344
|
+
:order => { :type => :short },
|
345
|
+
:created_at => { :type => :date },
|
346
|
+
:updated_at => { :type => :date }
|
347
|
+
}
|
348
|
+
},
|
349
|
+
:advanced_snippet => {
|
350
|
+
:properties => {
|
351
|
+
:value => { :type => :object },
|
352
|
+
:created_at => { :type => :date },
|
353
|
+
:updated_at => { :type => :date }
|
354
|
+
}
|
355
|
+
},
|
356
|
+
:validation => {
|
357
|
+
:properties => {
|
358
|
+
:state => { :type => :keyword },
|
359
|
+
:errors => {
|
360
|
+
:properties => {
|
361
|
+
:ids => { :type => :keyword },
|
362
|
+
:messages => { :type => :text }
|
363
|
+
}
|
364
|
+
}
|
365
|
+
}
|
366
|
+
}
|
367
|
+
}
|
368
|
+
},
|
369
|
+
:draft => {
|
370
|
+
:properties => {
|
371
|
+
:rules => {
|
372
|
+
:properties => {
|
373
|
+
:id => { :type => :keyword },
|
374
|
+
:policy => { :type => :keyword },
|
375
|
+
:field => { :type => :keyword },
|
376
|
+
:rule => { :type => :keyword },
|
377
|
+
:value => { :type => :keyword },
|
378
|
+
:order => { :type => :short },
|
379
|
+
:created_at => { :type => :date },
|
380
|
+
:updated_at => { :type => :date }
|
381
|
+
}
|
382
|
+
},
|
383
|
+
:advanced_snippet => {
|
384
|
+
:properties => {
|
385
|
+
:value => { :type => :object },
|
386
|
+
:created_at => { :type => :date },
|
387
|
+
:updated_at => { :type => :date }
|
388
|
+
}
|
389
|
+
},
|
390
|
+
:validation => {
|
391
|
+
:properties => {
|
392
|
+
:state => { :type => :keyword },
|
393
|
+
:errors => {
|
394
|
+
:properties => {
|
395
|
+
:ids => { :type => :keyword },
|
396
|
+
:messages => { :type => :text }
|
397
|
+
}
|
398
|
+
}
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
}
|
403
|
+
}
|
404
|
+
},
|
252
405
|
:index_name => { :type => :keyword },
|
406
|
+
:is_native => { :type => :boolean },
|
407
|
+
:language => { :type => :keyword },
|
253
408
|
:last_seen => { :type => :date },
|
409
|
+
:last_sync_error => { :type => :keyword },
|
410
|
+
:last_sync_status => { :type => :keyword },
|
254
411
|
:last_synced => { :type => :date },
|
255
|
-
:
|
256
|
-
:
|
412
|
+
:last_deleted_document_count => { :type => :long },
|
413
|
+
:last_indexed_document_count => { :type => :long },
|
414
|
+
:name => { :type => :keyword },
|
415
|
+
:pipeline => {
|
416
|
+
:properties => {
|
417
|
+
:extract_binary_content => { :type => :boolean },
|
418
|
+
:name => { :type => :keyword },
|
419
|
+
:reduce_whitespace => { :type => :boolean },
|
420
|
+
:run_ml_inference => { :type => :boolean }
|
421
|
+
}
|
422
|
+
},
|
257
423
|
:scheduling => {
|
258
424
|
:properties => {
|
259
425
|
:enabled => { :type => :boolean },
|
@@ -262,9 +428,7 @@ module Core
|
|
262
428
|
},
|
263
429
|
:service_type => { :type => :keyword },
|
264
430
|
:status => { :type => :keyword },
|
265
|
-
:
|
266
|
-
:sync_now => { :type => :boolean },
|
267
|
-
:sync_status => { :type => :keyword }
|
431
|
+
:sync_now => { :type => :boolean }
|
268
432
|
}
|
269
433
|
}
|
270
434
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -275,14 +439,68 @@ module Core
|
|
275
439
|
def ensure_job_index_exists
|
276
440
|
mappings = {
|
277
441
|
:properties => {
|
278
|
-
:
|
279
|
-
:
|
442
|
+
:cancelation_requested_at => { :type => :date },
|
443
|
+
:canceled_at => { :type => :date },
|
444
|
+
:completed_at => { :type => :date },
|
445
|
+
:connector => {
|
446
|
+
:properties => {
|
447
|
+
:configuration => { :type => :object },
|
448
|
+
:filtering => {
|
449
|
+
:properties => {
|
450
|
+
:domain => { :type => :keyword },
|
451
|
+
:rules => {
|
452
|
+
:properties => {
|
453
|
+
:id => { :type => :keyword },
|
454
|
+
:policy => { :type => :keyword },
|
455
|
+
:field => { :type => :keyword },
|
456
|
+
:rule => { :type => :keyword },
|
457
|
+
:value => { :type => :keyword },
|
458
|
+
:order => { :type => :short },
|
459
|
+
:created_at => { :type => :date },
|
460
|
+
:updated_at => { :type => :date }
|
461
|
+
}
|
462
|
+
},
|
463
|
+
:advanced_snippet => {
|
464
|
+
:properties => {
|
465
|
+
:value => { :type => :object },
|
466
|
+
:created_at => { :type => :date },
|
467
|
+
:updated_at => { :type => :date }
|
468
|
+
}
|
469
|
+
},
|
470
|
+
:warnings => {
|
471
|
+
:properties => {
|
472
|
+
:ids => { :type => :keyword },
|
473
|
+
:messages => { :type => :text }
|
474
|
+
}
|
475
|
+
}
|
476
|
+
}
|
477
|
+
},
|
478
|
+
:id => { :type => :keyword },
|
479
|
+
:index_name => { :type => :keyword },
|
480
|
+
:language => { :type => :keyword },
|
481
|
+
:pipeline => {
|
482
|
+
:properties => {
|
483
|
+
:extract_binary_content => { :type => :boolean },
|
484
|
+
:name => { :type => :keyword },
|
485
|
+
:reduce_whitespace => { :type => :boolean },
|
486
|
+
:run_ml_inference => { :type => :boolean }
|
487
|
+
}
|
488
|
+
},
|
489
|
+
:service_type => { :type => :keyword }
|
490
|
+
}
|
491
|
+
},
|
492
|
+
:created_at => { :type => :date },
|
493
|
+
:deleted_document_count => { :type => :integer },
|
280
494
|
:error => { :type => :text },
|
281
|
-
:worker_hostname => { :type => :keyword },
|
282
495
|
:indexed_document_count => { :type => :integer },
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
496
|
+
:indexed_document_volume => { :type => :integer },
|
497
|
+
:last_seen => { :type => :date },
|
498
|
+
:metadata => { :type => :object },
|
499
|
+
:started_at => { :type => :date },
|
500
|
+
:status => { :type => :keyword },
|
501
|
+
:total_document_count => { :type => :integer },
|
502
|
+
:trigger_method => { :type => :keyword },
|
503
|
+
:worker_hostname => { :type => :keyword }
|
286
504
|
}
|
287
505
|
}
|
288
506
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
@@ -313,8 +531,20 @@ module Core
|
|
313
531
|
end
|
314
532
|
end
|
315
533
|
|
534
|
+
def document_count(index_name)
|
535
|
+
client.count(:index => index_name)['count']
|
536
|
+
end
|
537
|
+
|
316
538
|
private
|
317
539
|
|
540
|
+
def should_update_validations?(domain_validations, filtering)
|
541
|
+
domains_present = filtering.collect { |filter| filter[:domain] }
|
542
|
+
domains_to_update = domain_validations.keys
|
543
|
+
|
544
|
+
# non-empty intersection -> domains to update present
|
545
|
+
!(domains_present & domains_to_update).empty?
|
546
|
+
end
|
547
|
+
|
318
548
|
def client
|
319
549
|
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
550
|
end
|
@@ -324,6 +554,15 @@ module Core
|
|
324
554
|
index_version = index_versions.max # gets the largest suffix number
|
325
555
|
"#{alias_name}-v#{index_version}"
|
326
556
|
end
|
557
|
+
|
558
|
+
def update_filter_validation(filter, domain_validations)
|
559
|
+
domain = filter[:domain]
|
560
|
+
|
561
|
+
if domain_validations.key?(domain)
|
562
|
+
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
563
|
+
filter.deep_merge!(new_validation_state)
|
564
|
+
end
|
565
|
+
end
|
327
566
|
end
|
328
567
|
end
|
329
568
|
end
|
data/lib/core/scheduler.rb
CHANGED
@@ -10,6 +10,7 @@ require 'time'
|
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
12
|
require 'core/elastic_connector_actions'
|
13
|
+
require 'core/filtering/validation_status'
|
13
14
|
require 'utility/cron'
|
14
15
|
require 'utility/logger'
|
15
16
|
require 'utility/exception_tracking'
|
@@ -38,15 +39,18 @@ module Core
|
|
38
39
|
if configuration_triggered?(cs)
|
39
40
|
yield cs, :configuration
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
if filtering_validation_triggered?(cs)
|
43
|
+
yield cs, :filter_validation
|
44
|
+
end
|
44
45
|
end
|
45
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
47
|
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
47
48
|
rescue StandardError => e
|
48
49
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
49
50
|
ensure
|
51
|
+
if @is_shutting_down
|
52
|
+
break
|
53
|
+
end
|
50
54
|
if @poll_interval > 0 && !@is_shutting_down
|
51
55
|
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
52
56
|
sleep(@poll_interval)
|
@@ -62,8 +66,6 @@ module Core
|
|
62
66
|
private
|
63
67
|
|
64
68
|
def sync_triggered?(connector_settings)
|
65
|
-
return false unless connector_registered?(connector_settings.service_type)
|
66
|
-
|
67
69
|
unless connector_settings.valid_index_name?
|
68
70
|
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
|
69
71
|
return false
|
@@ -129,8 +131,6 @@ module Core
|
|
129
131
|
end
|
130
132
|
|
131
133
|
def heartbeat_triggered?(connector_settings)
|
132
|
-
return false unless connector_registered?(connector_settings.service_type)
|
133
|
-
|
134
134
|
last_seen = connector_settings[:last_seen]
|
135
135
|
return true if last_seen.nil? || last_seen.empty?
|
136
136
|
last_seen = begin
|
@@ -144,11 +144,41 @@ module Core
|
|
144
144
|
end
|
145
145
|
|
146
146
|
def configuration_triggered?(connector_settings)
|
147
|
-
|
148
|
-
|
147
|
+
connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
148
|
+
end
|
149
|
+
|
150
|
+
def filtering_validation_triggered?(connector_settings)
|
151
|
+
filtering = connector_settings.filtering
|
152
|
+
|
153
|
+
unless filtering.present?
|
154
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
|
155
|
+
|
156
|
+
return false
|
149
157
|
end
|
150
158
|
|
151
|
-
|
159
|
+
draft_filters = filtering[:draft]
|
160
|
+
|
161
|
+
unless draft_filters.present?
|
162
|
+
Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
|
163
|
+
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
validation = draft_filters[:validation]
|
168
|
+
|
169
|
+
unless validation.present?
|
170
|
+
Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
|
171
|
+
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
|
175
|
+
unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
|
176
|
+
Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
|
177
|
+
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
152
182
|
end
|
153
183
|
|
154
184
|
def connector_registered?(service_type)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class BulkQueue
|
11
|
+
class QueueOverflowError < StandardError; end
|
12
|
+
|
13
|
+
# 500 items or 5MB
|
14
|
+
def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
|
15
|
+
@operation_count_threshold = operation_count_threshold.freeze
|
16
|
+
@size_threshold = size_threshold.freeze
|
17
|
+
|
18
|
+
@buffer = ''
|
19
|
+
|
20
|
+
@current_operation_count = 0
|
21
|
+
|
22
|
+
@current_buffer_size = 0
|
23
|
+
@current_data_size = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def pop_all
|
27
|
+
result = @buffer
|
28
|
+
|
29
|
+
reset
|
30
|
+
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def add(operation, payload = nil)
|
35
|
+
raise QueueOverflowError unless will_fit?(operation, payload)
|
36
|
+
|
37
|
+
operation_size = get_size(operation)
|
38
|
+
payload_size = get_size(payload)
|
39
|
+
|
40
|
+
@current_operation_count += 1
|
41
|
+
@current_buffer_size += operation_size
|
42
|
+
@current_buffer_size += payload_size
|
43
|
+
@current_data_size += payload_size
|
44
|
+
|
45
|
+
@buffer << operation
|
46
|
+
@buffer << "\n"
|
47
|
+
|
48
|
+
if payload
|
49
|
+
@buffer << payload
|
50
|
+
@buffer << "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def will_fit?(operation, payload = nil)
|
55
|
+
return false if @current_operation_count + 1 > @operation_count_threshold
|
56
|
+
|
57
|
+
operation_size = get_size(operation)
|
58
|
+
payload_size = get_size(payload)
|
59
|
+
|
60
|
+
@current_buffer_size + operation_size + payload_size < @size_threshold
|
61
|
+
end
|
62
|
+
|
63
|
+
def current_stats
|
64
|
+
{
|
65
|
+
:current_operation_count => @current_operation_count,
|
66
|
+
:current_buffer_size => @current_buffer_size
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def get_size(str)
|
73
|
+
return 0 unless str
|
74
|
+
str.bytesize
|
75
|
+
end
|
76
|
+
|
77
|
+
def reset
|
78
|
+
@current_operation_count = 0
|
79
|
+
@current_buffer_size = 0
|
80
|
+
@current_data_size = 0
|
81
|
+
|
82
|
+
@buffer = ''
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/utility/constants.rb
CHANGED
data/lib/utility/logger.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'config'
|
7
8
|
require 'logger'
|
8
9
|
require 'active_support/core_ext/module'
|
9
10
|
require 'active_support/core_ext/string/filters'
|
@@ -23,7 +24,7 @@ module Utility
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
28
|
end
|
28
29
|
|
29
30
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,14 +4,15 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'utility/bulk_queue'
|
8
|
+
require 'utility/common'
|
7
9
|
require 'utility/constants'
|
8
10
|
require 'utility/cron'
|
9
|
-
require 'utility/
|
11
|
+
require 'utility/elasticsearch/index/mappings'
|
12
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
|
+
require 'utility/environment'
|
10
14
|
require 'utility/errors'
|
11
15
|
require 'utility/es_client'
|
12
|
-
require 'utility/environment'
|
13
16
|
require 'utility/exception_tracking'
|
14
17
|
require 'utility/extension_mapping_util'
|
15
18
|
require 'utility/logger'
|
16
|
-
require 'utility/elasticsearch/index/mappings'
|
17
|
-
require 'utility/elasticsearch/index/text_analysis_settings'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.4.pre.
|
4
|
+
version: 8.6.0.4.pre.20221114T235050Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -110,6 +110,7 @@ files:
|
|
110
110
|
- lib/core/elastic_connector_actions.rb
|
111
111
|
- lib/core/scheduler.rb
|
112
112
|
- lib/utility.rb
|
113
|
+
- lib/utility/bulk_queue.rb
|
113
114
|
- lib/utility/common.rb
|
114
115
|
- lib/utility/constants.rb
|
115
116
|
- lib/utility/cron.rb
|
@@ -126,7 +127,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
126
127
|
licenses:
|
127
128
|
- Elastic-2.0
|
128
129
|
metadata:
|
129
|
-
revision:
|
130
|
+
revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
|
130
131
|
repository: git@github.com:elastic/ent-search-connectors.git
|
131
132
|
post_install_message:
|
132
133
|
rdoc_options: []
|