connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +4 -0
- data/lib/app/dispatcher.rb +42 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +114 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +72 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +25 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -19,6 +19,12 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class JobNotCreatedError < StandardError
|
23
|
+
def initialize(connector_id, response)
|
24
|
+
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
class ConnectorVersionChangedError < StandardError
|
23
29
|
def initialize(connector_id, seq_no, primary_term)
|
24
30
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -43,10 +49,17 @@ module Core
|
|
43
49
|
end
|
44
50
|
|
45
51
|
def get_connector(connector_id)
|
52
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
46
53
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
47
54
|
end
|
48
55
|
|
56
|
+
def get_job(job_id)
|
57
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
+
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
+
end
|
60
|
+
|
49
61
|
def connectors_meta
|
62
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
50
63
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
51
64
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
52
65
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -65,6 +78,19 @@ module Core
|
|
65
78
|
)
|
66
79
|
end
|
67
80
|
|
81
|
+
def search_jobs(query, page_size, offset)
|
82
|
+
client.search(
|
83
|
+
:index => Utility::Constants::JOB_INDEX,
|
84
|
+
:ignore => 404,
|
85
|
+
:body => {
|
86
|
+
:size => page_size,
|
87
|
+
:from => offset,
|
88
|
+
:query => query,
|
89
|
+
:sort => ['created_at']
|
90
|
+
}
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
68
94
|
def update_connector_configuration(connector_id, configuration)
|
69
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
70
96
|
end
|
@@ -84,11 +110,57 @@ module Core
|
|
84
110
|
update_connector_configuration(connector_id, payload)
|
85
111
|
end
|
86
112
|
|
87
|
-
def
|
113
|
+
def update_filtering_validation(connector_id, filter_validation_results)
|
114
|
+
return if filter_validation_results.empty?
|
115
|
+
|
116
|
+
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
+
|
118
|
+
case filtering
|
119
|
+
when Hash
|
120
|
+
update_filter_validation(filtering, filter_validation_results)
|
121
|
+
when Array
|
122
|
+
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
+
|
124
|
+
filtering.each do |filter|
|
125
|
+
update_filter_validation(filter, filter_validation_results)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
+
end
|
134
|
+
|
135
|
+
def update_connector_sync_now(connector_id, sync_now)
|
136
|
+
doc = connector_with_concurrency_control(connector_id)
|
137
|
+
|
138
|
+
body = { sync_now: sync_now, last_synced: Time.now }
|
139
|
+
|
140
|
+
update_connector_fields(
|
141
|
+
connector_id,
|
142
|
+
body,
|
143
|
+
doc[:seq_no],
|
144
|
+
doc[:primary_term]
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
def update_connector_last_sync_status(connector_id, last_sync_status)
|
149
|
+
doc = connector_with_concurrency_control(connector_id)
|
150
|
+
|
151
|
+
update_connector_fields(
|
152
|
+
connector_id,
|
153
|
+
{ last_sync_status: last_sync_status },
|
154
|
+
doc[:seq_no],
|
155
|
+
doc[:primary_term]
|
156
|
+
)
|
157
|
+
end
|
158
|
+
|
159
|
+
def connector_with_concurrency_control(connector_id)
|
88
160
|
seq_no = nil
|
89
161
|
primary_term = nil
|
90
|
-
|
91
|
-
|
162
|
+
|
163
|
+
doc = client.get(
|
92
164
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
93
165
|
:id => connector_id,
|
94
166
|
:ignore => 404,
|
@@ -96,39 +168,42 @@ module Core
|
|
96
168
|
).tap do |response|
|
97
169
|
seq_no = response['_seq_no']
|
98
170
|
primary_term = response['_primary_term']
|
99
|
-
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
100
171
|
end
|
101
|
-
if sync_in_progress
|
102
|
-
raise JobAlreadyRunningError.new(connector_id)
|
103
|
-
end
|
104
|
-
update_connector_fields(
|
105
|
-
connector_id,
|
106
|
-
{ :sync_now => false,
|
107
|
-
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
-
:last_synced => Time.now },
|
109
|
-
seq_no,
|
110
|
-
primary_term
|
111
|
-
)
|
112
172
|
|
173
|
+
{ doc: doc, seq_no: seq_no, primary_term: primary_term }
|
174
|
+
end
|
175
|
+
|
176
|
+
def create_job(connector_settings:)
|
113
177
|
body = {
|
114
|
-
:
|
115
|
-
:
|
116
|
-
:
|
117
|
-
:
|
118
|
-
|
178
|
+
status: Connectors::SyncStatus::PENDING,
|
179
|
+
created_at: Time.now,
|
180
|
+
last_seen: Time.now,
|
181
|
+
connector: {
|
182
|
+
id: connector_settings.id,
|
183
|
+
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
184
|
+
index_name: connector_settings.index_name,
|
185
|
+
language: connector_settings[:language],
|
186
|
+
pipeline: connector_settings[:pipeline],
|
187
|
+
service_type: connector_settings.service_type
|
188
|
+
}
|
119
189
|
}
|
120
190
|
|
121
|
-
client.index(:
|
191
|
+
index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
|
192
|
+
|
193
|
+
return index_response if index_response['result'] == 'created'
|
194
|
+
|
195
|
+
raise JobNotCreatedError.new(connector_settings.id, index_response)
|
122
196
|
end
|
123
197
|
|
124
198
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
199
|
return [] unless connector_filtering
|
126
200
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
201
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
202
|
+
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
128
203
|
job_filtering << {
|
129
204
|
'domain' => filtering_domain['domain'],
|
130
205
|
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
-
'advanced_snippet' =>
|
206
|
+
'advanced_snippet' => snippet['value'] || snippet,
|
132
207
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
208
|
}
|
134
209
|
end
|
@@ -145,22 +220,33 @@ module Core
|
|
145
220
|
update_connector_fields(connector_id, body)
|
146
221
|
end
|
147
222
|
|
148
|
-
def
|
149
|
-
|
223
|
+
def update_sync(job_id, metadata)
|
224
|
+
body = {
|
225
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
226
|
+
}
|
227
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
228
|
+
end
|
229
|
+
|
230
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
231
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
232
|
+
|
233
|
+
metadata ||= {}
|
150
234
|
|
151
235
|
update_connector_fields(connector_id,
|
152
236
|
:last_sync_status => sync_status,
|
153
|
-
:last_sync_error =>
|
154
|
-
:error =>
|
237
|
+
:last_sync_error => error,
|
238
|
+
:error => error,
|
155
239
|
:last_synced => Time.now,
|
156
|
-
:last_indexed_document_count =>
|
157
|
-
:last_deleted_document_count =>
|
240
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
241
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
158
242
|
|
159
243
|
body = {
|
160
244
|
:doc => {
|
161
245
|
:status => sync_status,
|
162
|
-
:completed_at => Time.now
|
163
|
-
|
246
|
+
:completed_at => Time.now,
|
247
|
+
:last_seen => Time.now,
|
248
|
+
:error => error
|
249
|
+
}.merge(metadata)
|
164
250
|
}
|
165
251
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
166
252
|
end
|
@@ -248,12 +334,105 @@ module Core
|
|
248
334
|
:properties => {
|
249
335
|
:api_key_id => { :type => :keyword },
|
250
336
|
:configuration => { :type => :object },
|
251
|
-
:
|
337
|
+
:description => { :type => :text },
|
338
|
+
:error => { :type => :keyword },
|
339
|
+
:features => {
|
340
|
+
:properties => {
|
341
|
+
:filtering_advanced_config => { :type => :boolean },
|
342
|
+
:filtering_rules => { :type => :boolean }
|
343
|
+
}
|
344
|
+
},
|
345
|
+
:filtering => {
|
346
|
+
:properties => {
|
347
|
+
:domain => { :type => :keyword },
|
348
|
+
:active => {
|
349
|
+
:properties => {
|
350
|
+
:rules => {
|
351
|
+
:properties => {
|
352
|
+
:id => { :type => :keyword },
|
353
|
+
:policy => { :type => :keyword },
|
354
|
+
:field => { :type => :keyword },
|
355
|
+
:rule => { :type => :keyword },
|
356
|
+
:value => { :type => :keyword },
|
357
|
+
:order => { :type => :short },
|
358
|
+
:created_at => { :type => :date },
|
359
|
+
:updated_at => { :type => :date }
|
360
|
+
}
|
361
|
+
},
|
362
|
+
:advanced_snippet => {
|
363
|
+
:properties => {
|
364
|
+
:value => { :type => :object },
|
365
|
+
:created_at => { :type => :date },
|
366
|
+
:updated_at => { :type => :date }
|
367
|
+
}
|
368
|
+
},
|
369
|
+
:validation => {
|
370
|
+
:properties => {
|
371
|
+
:state => { :type => :keyword },
|
372
|
+
:errors => {
|
373
|
+
:properties => {
|
374
|
+
:ids => { :type => :keyword },
|
375
|
+
:messages => { :type => :text }
|
376
|
+
}
|
377
|
+
}
|
378
|
+
}
|
379
|
+
}
|
380
|
+
}
|
381
|
+
},
|
382
|
+
:draft => {
|
383
|
+
:properties => {
|
384
|
+
:rules => {
|
385
|
+
:properties => {
|
386
|
+
:id => { :type => :keyword },
|
387
|
+
:policy => { :type => :keyword },
|
388
|
+
:field => { :type => :keyword },
|
389
|
+
:rule => { :type => :keyword },
|
390
|
+
:value => { :type => :keyword },
|
391
|
+
:order => { :type => :short },
|
392
|
+
:created_at => { :type => :date },
|
393
|
+
:updated_at => { :type => :date }
|
394
|
+
}
|
395
|
+
},
|
396
|
+
:advanced_snippet => {
|
397
|
+
:properties => {
|
398
|
+
:value => { :type => :object },
|
399
|
+
:created_at => { :type => :date },
|
400
|
+
:updated_at => { :type => :date }
|
401
|
+
}
|
402
|
+
},
|
403
|
+
:validation => {
|
404
|
+
:properties => {
|
405
|
+
:state => { :type => :keyword },
|
406
|
+
:errors => {
|
407
|
+
:properties => {
|
408
|
+
:ids => { :type => :keyword },
|
409
|
+
:messages => { :type => :text }
|
410
|
+
}
|
411
|
+
}
|
412
|
+
}
|
413
|
+
}
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
},
|
252
418
|
:index_name => { :type => :keyword },
|
419
|
+
:is_native => { :type => :boolean },
|
420
|
+
:language => { :type => :keyword },
|
253
421
|
:last_seen => { :type => :date },
|
422
|
+
:last_sync_error => { :type => :keyword },
|
423
|
+
:last_sync_status => { :type => :keyword },
|
254
424
|
:last_synced => { :type => :date },
|
255
|
-
:
|
256
|
-
:
|
425
|
+
:last_deleted_document_count => { :type => :long },
|
426
|
+
:last_indexed_document_count => { :type => :long },
|
427
|
+
:name => { :type => :keyword },
|
428
|
+
:pipeline => {
|
429
|
+
:properties => {
|
430
|
+
:extract_binary_content => { :type => :boolean },
|
431
|
+
:name => { :type => :keyword },
|
432
|
+
:reduce_whitespace => { :type => :boolean },
|
433
|
+
:run_ml_inference => { :type => :boolean }
|
434
|
+
}
|
435
|
+
},
|
257
436
|
:scheduling => {
|
258
437
|
:properties => {
|
259
438
|
:enabled => { :type => :boolean },
|
@@ -262,9 +441,7 @@ module Core
|
|
262
441
|
},
|
263
442
|
:service_type => { :type => :keyword },
|
264
443
|
:status => { :type => :keyword },
|
265
|
-
:
|
266
|
-
:sync_now => { :type => :boolean },
|
267
|
-
:sync_status => { :type => :keyword }
|
444
|
+
:sync_now => { :type => :boolean }
|
268
445
|
}
|
269
446
|
}
|
270
447
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -275,55 +452,139 @@ module Core
|
|
275
452
|
def ensure_job_index_exists
|
276
453
|
mappings = {
|
277
454
|
:properties => {
|
278
|
-
:
|
279
|
-
:
|
455
|
+
:cancelation_requested_at => { :type => :date },
|
456
|
+
:canceled_at => { :type => :date },
|
457
|
+
:completed_at => { :type => :date },
|
458
|
+
:connector => {
|
459
|
+
:properties => {
|
460
|
+
:configuration => { :type => :object },
|
461
|
+
:filtering => {
|
462
|
+
:properties => {
|
463
|
+
:domain => { :type => :keyword },
|
464
|
+
:rules => {
|
465
|
+
:properties => {
|
466
|
+
:id => { :type => :keyword },
|
467
|
+
:policy => { :type => :keyword },
|
468
|
+
:field => { :type => :keyword },
|
469
|
+
:rule => { :type => :keyword },
|
470
|
+
:value => { :type => :keyword },
|
471
|
+
:order => { :type => :short },
|
472
|
+
:created_at => { :type => :date },
|
473
|
+
:updated_at => { :type => :date }
|
474
|
+
}
|
475
|
+
},
|
476
|
+
:advanced_snippet => {
|
477
|
+
:properties => {
|
478
|
+
:value => { :type => :object },
|
479
|
+
:created_at => { :type => :date },
|
480
|
+
:updated_at => { :type => :date }
|
481
|
+
}
|
482
|
+
},
|
483
|
+
:warnings => {
|
484
|
+
:properties => {
|
485
|
+
:ids => { :type => :keyword },
|
486
|
+
:messages => { :type => :text }
|
487
|
+
}
|
488
|
+
}
|
489
|
+
}
|
490
|
+
},
|
491
|
+
:id => { :type => :keyword },
|
492
|
+
:index_name => { :type => :keyword },
|
493
|
+
:language => { :type => :keyword },
|
494
|
+
:pipeline => {
|
495
|
+
:properties => {
|
496
|
+
:extract_binary_content => { :type => :boolean },
|
497
|
+
:name => { :type => :keyword },
|
498
|
+
:reduce_whitespace => { :type => :boolean },
|
499
|
+
:run_ml_inference => { :type => :boolean }
|
500
|
+
}
|
501
|
+
},
|
502
|
+
:service_type => { :type => :keyword }
|
503
|
+
}
|
504
|
+
},
|
505
|
+
:created_at => { :type => :date },
|
506
|
+
:deleted_document_count => { :type => :integer },
|
280
507
|
:error => { :type => :text },
|
281
|
-
:worker_hostname => { :type => :keyword },
|
282
508
|
:indexed_document_count => { :type => :integer },
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
509
|
+
:indexed_document_volume => { :type => :integer },
|
510
|
+
:last_seen => { :type => :date },
|
511
|
+
:metadata => { :type => :object },
|
512
|
+
:started_at => { :type => :date },
|
513
|
+
:status => { :type => :keyword },
|
514
|
+
:total_document_count => { :type => :integer },
|
515
|
+
:trigger_method => { :type => :keyword },
|
516
|
+
:worker_hostname => { :type => :keyword }
|
286
517
|
}
|
287
518
|
}
|
288
519
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
289
520
|
end
|
290
521
|
|
291
522
|
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
523
|
+
update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
|
524
|
+
end
|
525
|
+
|
526
|
+
def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
|
527
|
+
update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
|
528
|
+
end
|
529
|
+
|
530
|
+
def document_count(index_name)
|
531
|
+
client.indices.refresh(:index => index_name)
|
532
|
+
client.count(:index => index_name)['count']
|
533
|
+
end
|
534
|
+
|
535
|
+
private
|
536
|
+
|
537
|
+
def should_update_validations?(domain_validations, filtering)
|
538
|
+
domains_present = filtering.collect { |filter| filter[:domain] }
|
539
|
+
domains_to_update = domain_validations.keys
|
540
|
+
|
541
|
+
# non-empty intersection -> domains to update present
|
542
|
+
!(domains_present & domains_to_update).empty?
|
543
|
+
end
|
544
|
+
|
545
|
+
def client
|
546
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
547
|
+
end
|
548
|
+
|
549
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
550
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
551
|
+
index_version = index_versions.max # gets the largest suffix number
|
552
|
+
"#{alias_name}-v#{index_version}"
|
553
|
+
end
|
554
|
+
|
555
|
+
def update_filter_validation(filter, domain_validations)
|
556
|
+
domain = filter[:domain]
|
557
|
+
|
558
|
+
if domain_validations.key?(domain)
|
559
|
+
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
560
|
+
filter.deep_merge!(new_validation_state)
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
|
292
565
|
return if doc.empty?
|
293
566
|
update_args = {
|
294
|
-
:index =>
|
295
|
-
:id =>
|
567
|
+
:index => index,
|
568
|
+
:id => id,
|
296
569
|
:body => { :doc => doc },
|
297
570
|
:refresh => true,
|
298
571
|
:retry_on_conflict => 3
|
299
572
|
}
|
300
|
-
|
301
|
-
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
573
|
+
|
302
574
|
if seq_no && primary_term
|
303
575
|
update_args[:if_seq_no] = seq_no
|
304
576
|
update_args[:if_primary_term] = primary_term
|
305
577
|
update_args.delete(:retry_on_conflict)
|
306
578
|
end
|
579
|
+
|
307
580
|
begin
|
308
581
|
client.update(update_args)
|
309
582
|
rescue Elastic::Transport::Transport::Errors::Conflict
|
310
583
|
# VersionConflictException
|
311
584
|
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
312
|
-
raise ConnectorVersionChangedError.new(
|
585
|
+
raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
|
313
586
|
end
|
314
587
|
end
|
315
|
-
|
316
|
-
private
|
317
|
-
|
318
|
-
def client
|
319
|
-
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
|
-
end
|
321
|
-
|
322
|
-
def get_latest_index_in_alias(alias_name, indicies)
|
323
|
-
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
324
|
-
index_version = index_versions.max # gets the largest suffix number
|
325
|
-
"#{alias_name}-v#{index_version}"
|
326
|
-
end
|
327
588
|
end
|
328
589
|
end
|
329
590
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering'
|
10
|
+
require 'utility/filtering'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
class PostProcessEngine
|
15
|
+
attr_reader :rules
|
16
|
+
|
17
|
+
def initialize(job_description)
|
18
|
+
@rules = ordered_rules(job_description.dig('connector', 'filtering'))
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(document)
|
22
|
+
@rules.each do |rule|
|
23
|
+
if rule.match?(document.stringify_keys)
|
24
|
+
return PostProcessResult.new(document, rule)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def ordered_rules(job_filtering)
|
33
|
+
job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
|
34
|
+
sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
|
35
|
+
sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Filtering
|
13
|
+
class PostProcessResult
|
14
|
+
attr_reader :document, :matching_rule
|
15
|
+
|
16
|
+
def initialize(document, matching_rule)
|
17
|
+
@document = document
|
18
|
+
@matching_rule = matching_rule
|
19
|
+
Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
|
20
|
+
end
|
21
|
+
|
22
|
+
def is_include?
|
23
|
+
matching_rule.is_include?
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|