connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +4 -0
- data/lib/app/dispatcher.rb +42 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +114 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +72 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +25 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -19,6 +19,12 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class JobNotCreatedError < StandardError
|
23
|
+
def initialize(connector_id, response)
|
24
|
+
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
class ConnectorVersionChangedError < StandardError
|
23
29
|
def initialize(connector_id, seq_no, primary_term)
|
24
30
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -43,10 +49,17 @@ module Core
|
|
43
49
|
end
|
44
50
|
|
45
51
|
def get_connector(connector_id)
|
52
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
46
53
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
47
54
|
end
|
48
55
|
|
56
|
+
def get_job(job_id)
|
57
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
+
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
+
end
|
60
|
+
|
49
61
|
def connectors_meta
|
62
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
50
63
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
51
64
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
52
65
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -65,6 +78,19 @@ module Core
|
|
65
78
|
)
|
66
79
|
end
|
67
80
|
|
81
|
+
def search_jobs(query, page_size, offset)
|
82
|
+
client.search(
|
83
|
+
:index => Utility::Constants::JOB_INDEX,
|
84
|
+
:ignore => 404,
|
85
|
+
:body => {
|
86
|
+
:size => page_size,
|
87
|
+
:from => offset,
|
88
|
+
:query => query,
|
89
|
+
:sort => ['created_at']
|
90
|
+
}
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
68
94
|
def update_connector_configuration(connector_id, configuration)
|
69
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
70
96
|
end
|
@@ -84,11 +110,57 @@ module Core
|
|
84
110
|
update_connector_configuration(connector_id, payload)
|
85
111
|
end
|
86
112
|
|
87
|
-
def
|
113
|
+
def update_filtering_validation(connector_id, filter_validation_results)
|
114
|
+
return if filter_validation_results.empty?
|
115
|
+
|
116
|
+
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
+
|
118
|
+
case filtering
|
119
|
+
when Hash
|
120
|
+
update_filter_validation(filtering, filter_validation_results)
|
121
|
+
when Array
|
122
|
+
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
+
|
124
|
+
filtering.each do |filter|
|
125
|
+
update_filter_validation(filter, filter_validation_results)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
+
end
|
134
|
+
|
135
|
+
def update_connector_sync_now(connector_id, sync_now)
|
136
|
+
doc = connector_with_concurrency_control(connector_id)
|
137
|
+
|
138
|
+
body = { sync_now: sync_now, last_synced: Time.now }
|
139
|
+
|
140
|
+
update_connector_fields(
|
141
|
+
connector_id,
|
142
|
+
body,
|
143
|
+
doc[:seq_no],
|
144
|
+
doc[:primary_term]
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
def update_connector_last_sync_status(connector_id, last_sync_status)
|
149
|
+
doc = connector_with_concurrency_control(connector_id)
|
150
|
+
|
151
|
+
update_connector_fields(
|
152
|
+
connector_id,
|
153
|
+
{ last_sync_status: last_sync_status },
|
154
|
+
doc[:seq_no],
|
155
|
+
doc[:primary_term]
|
156
|
+
)
|
157
|
+
end
|
158
|
+
|
159
|
+
def connector_with_concurrency_control(connector_id)
|
88
160
|
seq_no = nil
|
89
161
|
primary_term = nil
|
90
|
-
|
91
|
-
|
162
|
+
|
163
|
+
doc = client.get(
|
92
164
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
93
165
|
:id => connector_id,
|
94
166
|
:ignore => 404,
|
@@ -96,39 +168,42 @@ module Core
|
|
96
168
|
).tap do |response|
|
97
169
|
seq_no = response['_seq_no']
|
98
170
|
primary_term = response['_primary_term']
|
99
|
-
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
100
171
|
end
|
101
|
-
if sync_in_progress
|
102
|
-
raise JobAlreadyRunningError.new(connector_id)
|
103
|
-
end
|
104
|
-
update_connector_fields(
|
105
|
-
connector_id,
|
106
|
-
{ :sync_now => false,
|
107
|
-
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
-
:last_synced => Time.now },
|
109
|
-
seq_no,
|
110
|
-
primary_term
|
111
|
-
)
|
112
172
|
|
173
|
+
{ doc: doc, seq_no: seq_no, primary_term: primary_term }
|
174
|
+
end
|
175
|
+
|
176
|
+
def create_job(connector_settings:)
|
113
177
|
body = {
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
118
|
-
|
178
|
+
status: Connectors::SyncStatus::PENDING,
|
179
|
+
created_at: Time.now,
|
180
|
+
last_seen: Time.now,
|
181
|
+
connector: {
|
182
|
+
id: connector_settings.id,
|
183
|
+
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
184
|
+
index_name: connector_settings.index_name,
|
185
|
+
language: connector_settings[:language],
|
186
|
+
pipeline: connector_settings[:pipeline],
|
187
|
+
service_type: connector_settings.service_type
|
188
|
+
}
|
119
189
|
}
|
120
190
|
|
121
|
-
client.index(:
|
191
|
+
index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
|
192
|
+
|
193
|
+
return index_response if index_response['result'] == 'created'
|
194
|
+
|
195
|
+
raise JobNotCreatedError.new(connector_settings.id, index_response)
|
122
196
|
end
|
123
197
|
|
124
198
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
199
|
return [] unless connector_filtering
|
126
200
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
201
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
202
|
+
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
128
203
|
job_filtering << {
|
129
204
|
'domain' => filtering_domain['domain'],
|
130
205
|
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
-
'advanced_snippet' =>
|
206
|
+
'advanced_snippet' => snippet['value'] || snippet,
|
132
207
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
208
|
}
|
134
209
|
end
|
@@ -145,22 +220,33 @@ module Core
|
|
145
220
|
update_connector_fields(connector_id, body)
|
146
221
|
end
|
147
222
|
|
148
|
-
def
|
149
|
-
|
223
|
+
def update_sync(job_id, metadata)
|
224
|
+
body = {
|
225
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
226
|
+
}
|
227
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
228
|
+
end
|
229
|
+
|
230
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
231
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
232
|
+
|
233
|
+
metadata ||= {}
|
150
234
|
|
151
235
|
update_connector_fields(connector_id,
|
152
236
|
:last_sync_status => sync_status,
|
153
|
-
:last_sync_error =>
|
154
|
-
:error =>
|
237
|
+
:last_sync_error => error,
|
238
|
+
:error => error,
|
155
239
|
:last_synced => Time.now,
|
156
|
-
:last_indexed_document_count =>
|
157
|
-
:last_deleted_document_count =>
|
240
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
241
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
158
242
|
|
159
243
|
body = {
|
160
244
|
:doc => {
|
161
245
|
:status => sync_status,
|
162
|
-
:completed_at => Time.now
|
163
|
-
|
246
|
+
:completed_at => Time.now,
|
247
|
+
:last_seen => Time.now,
|
248
|
+
:error => error
|
249
|
+
}.merge(metadata)
|
164
250
|
}
|
165
251
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
166
252
|
end
|
@@ -248,12 +334,105 @@ module Core
|
|
248
334
|
:properties => {
|
249
335
|
:api_key_id => { :type => :keyword },
|
250
336
|
:configuration => { :type => :object },
|
251
|
-
:
|
337
|
+
:description => { :type => :text },
|
338
|
+
:error => { :type => :keyword },
|
339
|
+
:features => {
|
340
|
+
:properties => {
|
341
|
+
:filtering_advanced_config => { :type => :boolean },
|
342
|
+
:filtering_rules => { :type => :boolean }
|
343
|
+
}
|
344
|
+
},
|
345
|
+
:filtering => {
|
346
|
+
:properties => {
|
347
|
+
:domain => { :type => :keyword },
|
348
|
+
:active => {
|
349
|
+
:properties => {
|
350
|
+
:rules => {
|
351
|
+
:properties => {
|
352
|
+
:id => { :type => :keyword },
|
353
|
+
:policy => { :type => :keyword },
|
354
|
+
:field => { :type => :keyword },
|
355
|
+
:rule => { :type => :keyword },
|
356
|
+
:value => { :type => :keyword },
|
357
|
+
:order => { :type => :short },
|
358
|
+
:created_at => { :type => :date },
|
359
|
+
:updated_at => { :type => :date }
|
360
|
+
}
|
361
|
+
},
|
362
|
+
:advanced_snippet => {
|
363
|
+
:properties => {
|
364
|
+
:value => { :type => :object },
|
365
|
+
:created_at => { :type => :date },
|
366
|
+
:updated_at => { :type => :date }
|
367
|
+
}
|
368
|
+
},
|
369
|
+
:validation => {
|
370
|
+
:properties => {
|
371
|
+
:state => { :type => :keyword },
|
372
|
+
:errors => {
|
373
|
+
:properties => {
|
374
|
+
:ids => { :type => :keyword },
|
375
|
+
:messages => { :type => :text }
|
376
|
+
}
|
377
|
+
}
|
378
|
+
}
|
379
|
+
}
|
380
|
+
}
|
381
|
+
},
|
382
|
+
:draft => {
|
383
|
+
:properties => {
|
384
|
+
:rules => {
|
385
|
+
:properties => {
|
386
|
+
:id => { :type => :keyword },
|
387
|
+
:policy => { :type => :keyword },
|
388
|
+
:field => { :type => :keyword },
|
389
|
+
:rule => { :type => :keyword },
|
390
|
+
:value => { :type => :keyword },
|
391
|
+
:order => { :type => :short },
|
392
|
+
:created_at => { :type => :date },
|
393
|
+
:updated_at => { :type => :date }
|
394
|
+
}
|
395
|
+
},
|
396
|
+
:advanced_snippet => {
|
397
|
+
:properties => {
|
398
|
+
:value => { :type => :object },
|
399
|
+
:created_at => { :type => :date },
|
400
|
+
:updated_at => { :type => :date }
|
401
|
+
}
|
402
|
+
},
|
403
|
+
:validation => {
|
404
|
+
:properties => {
|
405
|
+
:state => { :type => :keyword },
|
406
|
+
:errors => {
|
407
|
+
:properties => {
|
408
|
+
:ids => { :type => :keyword },
|
409
|
+
:messages => { :type => :text }
|
410
|
+
}
|
411
|
+
}
|
412
|
+
}
|
413
|
+
}
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
},
|
252
418
|
:index_name => { :type => :keyword },
|
419
|
+
:is_native => { :type => :boolean },
|
420
|
+
:language => { :type => :keyword },
|
253
421
|
:last_seen => { :type => :date },
|
422
|
+
:last_sync_error => { :type => :keyword },
|
423
|
+
:last_sync_status => { :type => :keyword },
|
254
424
|
:last_synced => { :type => :date },
|
255
|
-
:
|
256
|
-
:
|
425
|
+
:last_deleted_document_count => { :type => :long },
|
426
|
+
:last_indexed_document_count => { :type => :long },
|
427
|
+
:name => { :type => :keyword },
|
428
|
+
:pipeline => {
|
429
|
+
:properties => {
|
430
|
+
:extract_binary_content => { :type => :boolean },
|
431
|
+
:name => { :type => :keyword },
|
432
|
+
:reduce_whitespace => { :type => :boolean },
|
433
|
+
:run_ml_inference => { :type => :boolean }
|
434
|
+
}
|
435
|
+
},
|
257
436
|
:scheduling => {
|
258
437
|
:properties => {
|
259
438
|
:enabled => { :type => :boolean },
|
@@ -262,9 +441,7 @@ module Core
|
|
262
441
|
},
|
263
442
|
:service_type => { :type => :keyword },
|
264
443
|
:status => { :type => :keyword },
|
265
|
-
:
|
266
|
-
:sync_now => { :type => :boolean },
|
267
|
-
:sync_status => { :type => :keyword }
|
444
|
+
:sync_now => { :type => :boolean }
|
268
445
|
}
|
269
446
|
}
|
270
447
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -275,55 +452,139 @@ module Core
|
|
275
452
|
def ensure_job_index_exists
|
276
453
|
mappings = {
|
277
454
|
:properties => {
|
278
|
-
:
|
279
|
-
:
|
455
|
+
:cancelation_requested_at => { :type => :date },
|
456
|
+
:canceled_at => { :type => :date },
|
457
|
+
:completed_at => { :type => :date },
|
458
|
+
:connector => {
|
459
|
+
:properties => {
|
460
|
+
:configuration => { :type => :object },
|
461
|
+
:filtering => {
|
462
|
+
:properties => {
|
463
|
+
:domain => { :type => :keyword },
|
464
|
+
:rules => {
|
465
|
+
:properties => {
|
466
|
+
:id => { :type => :keyword },
|
467
|
+
:policy => { :type => :keyword },
|
468
|
+
:field => { :type => :keyword },
|
469
|
+
:rule => { :type => :keyword },
|
470
|
+
:value => { :type => :keyword },
|
471
|
+
:order => { :type => :short },
|
472
|
+
:created_at => { :type => :date },
|
473
|
+
:updated_at => { :type => :date }
|
474
|
+
}
|
475
|
+
},
|
476
|
+
:advanced_snippet => {
|
477
|
+
:properties => {
|
478
|
+
:value => { :type => :object },
|
479
|
+
:created_at => { :type => :date },
|
480
|
+
:updated_at => { :type => :date }
|
481
|
+
}
|
482
|
+
},
|
483
|
+
:warnings => {
|
484
|
+
:properties => {
|
485
|
+
:ids => { :type => :keyword },
|
486
|
+
:messages => { :type => :text }
|
487
|
+
}
|
488
|
+
}
|
489
|
+
}
|
490
|
+
},
|
491
|
+
:id => { :type => :keyword },
|
492
|
+
:index_name => { :type => :keyword },
|
493
|
+
:language => { :type => :keyword },
|
494
|
+
:pipeline => {
|
495
|
+
:properties => {
|
496
|
+
:extract_binary_content => { :type => :boolean },
|
497
|
+
:name => { :type => :keyword },
|
498
|
+
:reduce_whitespace => { :type => :boolean },
|
499
|
+
:run_ml_inference => { :type => :boolean }
|
500
|
+
}
|
501
|
+
},
|
502
|
+
:service_type => { :type => :keyword }
|
503
|
+
}
|
504
|
+
},
|
505
|
+
:created_at => { :type => :date },
|
506
|
+
:deleted_document_count => { :type => :integer },
|
280
507
|
:error => { :type => :text },
|
281
|
-
:worker_hostname => { :type => :keyword },
|
282
508
|
:indexed_document_count => { :type => :integer },
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
509
|
+
:indexed_document_volume => { :type => :integer },
|
510
|
+
:last_seen => { :type => :date },
|
511
|
+
:metadata => { :type => :object },
|
512
|
+
:started_at => { :type => :date },
|
513
|
+
:status => { :type => :keyword },
|
514
|
+
:total_document_count => { :type => :integer },
|
515
|
+
:trigger_method => { :type => :keyword },
|
516
|
+
:worker_hostname => { :type => :keyword }
|
286
517
|
}
|
287
518
|
}
|
288
519
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
289
520
|
end
|
290
521
|
|
291
522
|
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
523
|
+
update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
|
524
|
+
end
|
525
|
+
|
526
|
+
def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
|
527
|
+
update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
|
528
|
+
end
|
529
|
+
|
530
|
+
def document_count(index_name)
|
531
|
+
client.indices.refresh(:index => index_name)
|
532
|
+
client.count(:index => index_name)['count']
|
533
|
+
end
|
534
|
+
|
535
|
+
private
|
536
|
+
|
537
|
+
def should_update_validations?(domain_validations, filtering)
|
538
|
+
domains_present = filtering.collect { |filter| filter[:domain] }
|
539
|
+
domains_to_update = domain_validations.keys
|
540
|
+
|
541
|
+
# non-empty intersection -> domains to update present
|
542
|
+
!(domains_present & domains_to_update).empty?
|
543
|
+
end
|
544
|
+
|
545
|
+
def client
|
546
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
547
|
+
end
|
548
|
+
|
549
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
550
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
551
|
+
index_version = index_versions.max # gets the largest suffix number
|
552
|
+
"#{alias_name}-v#{index_version}"
|
553
|
+
end
|
554
|
+
|
555
|
+
def update_filter_validation(filter, domain_validations)
|
556
|
+
domain = filter[:domain]
|
557
|
+
|
558
|
+
if domain_validations.key?(domain)
|
559
|
+
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
560
|
+
filter.deep_merge!(new_validation_state)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
|
292
565
|
return if doc.empty?
|
293
566
|
update_args = {
|
294
|
-
:index =>
|
295
|
-
:id =>
|
567
|
+
:index => index,
|
568
|
+
:id => id,
|
296
569
|
:body => { :doc => doc },
|
297
570
|
:refresh => true,
|
298
571
|
:retry_on_conflict => 3
|
299
572
|
}
|
300
|
-
|
301
|
-
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
573
|
+
|
302
574
|
if seq_no && primary_term
|
303
575
|
update_args[:if_seq_no] = seq_no
|
304
576
|
update_args[:if_primary_term] = primary_term
|
305
577
|
update_args.delete(:retry_on_conflict)
|
306
578
|
end
|
579
|
+
|
307
580
|
begin
|
308
581
|
client.update(update_args)
|
309
582
|
rescue Elastic::Transport::Transport::Errors::Conflict
|
310
583
|
# VersionConflictException
|
311
584
|
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
312
|
-
raise ConnectorVersionChangedError.new(
|
585
|
+
raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
|
313
586
|
end
|
314
587
|
end
|
315
|
-
|
316
|
-
private
|
317
|
-
|
318
|
-
def client
|
319
|
-
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
|
-
end
|
321
|
-
|
322
|
-
def get_latest_index_in_alias(alias_name, indicies)
|
323
|
-
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
324
|
-
index_version = index_versions.max # gets the largest suffix number
|
325
|
-
"#{alias_name}-v#{index_version}"
|
326
|
-
end
|
327
588
|
end
|
328
589
|
end
|
329
590
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering'
|
10
|
+
require 'utility/filtering'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
class PostProcessEngine
|
15
|
+
attr_reader :rules
|
16
|
+
|
17
|
+
def initialize(job_description)
|
18
|
+
@rules = ordered_rules(job_description.dig('connector', 'filtering'))
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(document)
|
22
|
+
@rules.each do |rule|
|
23
|
+
if rule.match?(document.stringify_keys)
|
24
|
+
return PostProcessResult.new(document, rule)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def ordered_rules(job_filtering)
|
33
|
+
job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
|
34
|
+
sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
|
35
|
+
sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Filtering
|
13
|
+
class PostProcessResult
|
14
|
+
attr_reader :document, :matching_rule
|
15
|
+
|
16
|
+
def initialize(document, matching_rule)
|
17
|
+
@document = document
|
18
|
+
@matching_rule = matching_rule
|
19
|
+
Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
|
20
|
+
end
|
21
|
+
|
22
|
+
def is_include?
|
23
|
+
matching_rule.is_include?
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|