connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +0 -4
- data/lib/app/dispatcher.rb +17 -42
- data/lib/app/preflight_check.rb +0 -11
- data/lib/connectors/base/connector.rb +14 -43
- data/lib/connectors/example/connector.rb +0 -6
- data/lib/connectors/gitlab/connector.rb +1 -6
- data/lib/connectors/mongodb/connector.rb +43 -47
- data/lib/connectors/sync_status.rb +1 -6
- data/lib/core/configuration.rb +1 -3
- data/lib/core/connector_settings.rb +16 -52
- data/lib/core/elastic_connector_actions.rb +59 -320
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
- data/lib/core/scheduler.rb +10 -40
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +16 -72
- data/lib/core.rb +0 -4
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/errors.rb +12 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +4 -11
- metadata +9 -27
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
- data/lib/connectors/tolerable_error_helper.rb +0 -43
- data/lib/core/connector_job.rb +0 -210
- data/lib/core/filtering/post_process_engine.rb +0 -39
- data/lib/core/filtering/post_process_result.rb +0 -27
- data/lib/core/filtering/simple_rule.rb +0 -141
- data/lib/core/filtering/validation_job_runner.rb +0 -53
- data/lib/core/filtering/validation_status.rb +0 -17
- data/lib/core/filtering.rb +0 -17
- data/lib/core/ingestion/es_sink.rb +0 -118
- data/lib/core/jobs/consumer.rb +0 -114
- data/lib/core/jobs/producer.rb +0 -26
- data/lib/utility/bulk_queue.rb +0 -85
- data/lib/utility/error_monitor.rb +0 -108
- data/lib/utility/filtering.rb +0 -22
@@ -19,12 +19,6 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
class JobNotCreatedError < StandardError
|
23
|
-
def initialize(connector_id, response)
|
24
|
-
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
22
|
class ConnectorVersionChangedError < StandardError
|
29
23
|
def initialize(connector_id, seq_no, primary_term)
|
30
24
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -49,17 +43,10 @@ module Core
|
|
49
43
|
end
|
50
44
|
|
51
45
|
def get_connector(connector_id)
|
52
|
-
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
53
46
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
54
47
|
end
|
55
48
|
|
56
|
-
def get_job(job_id)
|
57
|
-
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
-
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
-
end
|
60
|
-
|
61
49
|
def connectors_meta
|
62
|
-
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
63
50
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
64
51
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
65
52
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -78,19 +65,6 @@ module Core
|
|
78
65
|
)
|
79
66
|
end
|
80
67
|
|
81
|
-
def search_jobs(query, page_size, offset)
|
82
|
-
client.search(
|
83
|
-
:index => Utility::Constants::JOB_INDEX,
|
84
|
-
:ignore => 404,
|
85
|
-
:body => {
|
86
|
-
:size => page_size,
|
87
|
-
:from => offset,
|
88
|
-
:query => query,
|
89
|
-
:sort => ['created_at']
|
90
|
-
}
|
91
|
-
)
|
92
|
-
end
|
93
|
-
|
94
68
|
def update_connector_configuration(connector_id, configuration)
|
95
69
|
update_connector_fields(connector_id, :configuration => configuration)
|
96
70
|
end
|
@@ -110,57 +84,11 @@ module Core
|
|
110
84
|
update_connector_configuration(connector_id, payload)
|
111
85
|
end
|
112
86
|
|
113
|
-
def
|
114
|
-
return if filter_validation_results.empty?
|
115
|
-
|
116
|
-
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
-
|
118
|
-
case filtering
|
119
|
-
when Hash
|
120
|
-
update_filter_validation(filtering, filter_validation_results)
|
121
|
-
when Array
|
122
|
-
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
-
|
124
|
-
filtering.each do |filter|
|
125
|
-
update_filter_validation(filter, filter_validation_results)
|
126
|
-
end
|
127
|
-
else
|
128
|
-
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
-
return
|
130
|
-
end
|
131
|
-
|
132
|
-
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
-
end
|
134
|
-
|
135
|
-
def update_connector_sync_now(connector_id, sync_now)
|
136
|
-
doc = connector_with_concurrency_control(connector_id)
|
137
|
-
|
138
|
-
body = { sync_now: sync_now, last_synced: Time.now }
|
139
|
-
|
140
|
-
update_connector_fields(
|
141
|
-
connector_id,
|
142
|
-
body,
|
143
|
-
doc[:seq_no],
|
144
|
-
doc[:primary_term]
|
145
|
-
)
|
146
|
-
end
|
147
|
-
|
148
|
-
def update_connector_last_sync_status(connector_id, last_sync_status)
|
149
|
-
doc = connector_with_concurrency_control(connector_id)
|
150
|
-
|
151
|
-
update_connector_fields(
|
152
|
-
connector_id,
|
153
|
-
{ last_sync_status: last_sync_status },
|
154
|
-
doc[:seq_no],
|
155
|
-
doc[:primary_term]
|
156
|
-
)
|
157
|
-
end
|
158
|
-
|
159
|
-
def connector_with_concurrency_control(connector_id)
|
87
|
+
def claim_job(connector_id)
|
160
88
|
seq_no = nil
|
161
89
|
primary_term = nil
|
162
|
-
|
163
|
-
|
90
|
+
sync_in_progress = false
|
91
|
+
connector_record = client.get(
|
164
92
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
165
93
|
:id => connector_id,
|
166
94
|
:ignore => 404,
|
@@ -168,42 +96,39 @@ module Core
|
|
168
96
|
).tap do |response|
|
169
97
|
seq_no = response['_seq_no']
|
170
98
|
primary_term = response['_primary_term']
|
99
|
+
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
171
100
|
end
|
101
|
+
if sync_in_progress
|
102
|
+
raise JobAlreadyRunningError.new(connector_id)
|
103
|
+
end
|
104
|
+
update_connector_fields(
|
105
|
+
connector_id,
|
106
|
+
{ :sync_now => false,
|
107
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
+
:last_synced => Time.now },
|
109
|
+
seq_no,
|
110
|
+
primary_term
|
111
|
+
)
|
172
112
|
|
173
|
-
{ doc: doc, seq_no: seq_no, primary_term: primary_term }
|
174
|
-
end
|
175
|
-
|
176
|
-
def create_job(connector_settings:)
|
177
113
|
body = {
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
184
|
-
index_name: connector_settings.index_name,
|
185
|
-
language: connector_settings[:language],
|
186
|
-
pipeline: connector_settings[:pipeline],
|
187
|
-
service_type: connector_settings.service_type
|
188
|
-
}
|
114
|
+
:connector_id => connector_id,
|
115
|
+
:status => Connectors::SyncStatus::IN_PROGRESS,
|
116
|
+
:worker_hostname => Socket.gethostname,
|
117
|
+
:created_at => Time.now,
|
118
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
189
119
|
}
|
190
120
|
|
191
|
-
|
192
|
-
|
193
|
-
return index_response if index_response['result'] == 'created'
|
194
|
-
|
195
|
-
raise JobNotCreatedError.new(connector_settings.id, index_response)
|
121
|
+
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
196
122
|
end
|
197
123
|
|
198
124
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
199
125
|
return [] unless connector_filtering
|
200
126
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
201
127
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
202
|
-
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
203
128
|
job_filtering << {
|
204
129
|
'domain' => filtering_domain['domain'],
|
205
130
|
'rules' => filtering_domain.dig('active', 'rules'),
|
206
|
-
'advanced_snippet' =>
|
131
|
+
'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
|
207
132
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
208
133
|
}
|
209
134
|
end
|
@@ -220,33 +145,22 @@ module Core
|
|
220
145
|
update_connector_fields(connector_id, body)
|
221
146
|
end
|
222
147
|
|
223
|
-
def
|
224
|
-
|
225
|
-
:doc => { :last_seen => Time.now }.merge(metadata)
|
226
|
-
}
|
227
|
-
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
228
|
-
end
|
229
|
-
|
230
|
-
def complete_sync(connector_id, job_id, metadata, error)
|
231
|
-
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
232
|
-
|
233
|
-
metadata ||= {}
|
148
|
+
def complete_sync(connector_id, job_id, status)
|
149
|
+
sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
234
150
|
|
235
151
|
update_connector_fields(connector_id,
|
236
152
|
:last_sync_status => sync_status,
|
237
|
-
:last_sync_error => error,
|
238
|
-
:error => error,
|
153
|
+
:last_sync_error => status[:error],
|
154
|
+
:error => status[:error],
|
239
155
|
:last_synced => Time.now,
|
240
|
-
:last_indexed_document_count =>
|
241
|
-
:last_deleted_document_count =>
|
156
|
+
:last_indexed_document_count => status[:indexed_document_count],
|
157
|
+
:last_deleted_document_count => status[:deleted_document_count])
|
242
158
|
|
243
159
|
body = {
|
244
160
|
:doc => {
|
245
161
|
:status => sync_status,
|
246
|
-
:completed_at => Time.now
|
247
|
-
|
248
|
-
:error => error
|
249
|
-
}.merge(metadata)
|
162
|
+
:completed_at => Time.now
|
163
|
+
}.merge(status)
|
250
164
|
}
|
251
165
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
252
166
|
end
|
@@ -334,105 +248,12 @@ module Core
|
|
334
248
|
:properties => {
|
335
249
|
:api_key_id => { :type => :keyword },
|
336
250
|
:configuration => { :type => :object },
|
337
|
-
:
|
338
|
-
:error => { :type => :keyword },
|
339
|
-
:features => {
|
340
|
-
:properties => {
|
341
|
-
:filtering_advanced_config => { :type => :boolean },
|
342
|
-
:filtering_rules => { :type => :boolean }
|
343
|
-
}
|
344
|
-
},
|
345
|
-
:filtering => {
|
346
|
-
:properties => {
|
347
|
-
:domain => { :type => :keyword },
|
348
|
-
:active => {
|
349
|
-
:properties => {
|
350
|
-
:rules => {
|
351
|
-
:properties => {
|
352
|
-
:id => { :type => :keyword },
|
353
|
-
:policy => { :type => :keyword },
|
354
|
-
:field => { :type => :keyword },
|
355
|
-
:rule => { :type => :keyword },
|
356
|
-
:value => { :type => :keyword },
|
357
|
-
:order => { :type => :short },
|
358
|
-
:created_at => { :type => :date },
|
359
|
-
:updated_at => { :type => :date }
|
360
|
-
}
|
361
|
-
},
|
362
|
-
:advanced_snippet => {
|
363
|
-
:properties => {
|
364
|
-
:value => { :type => :object },
|
365
|
-
:created_at => { :type => :date },
|
366
|
-
:updated_at => { :type => :date }
|
367
|
-
}
|
368
|
-
},
|
369
|
-
:validation => {
|
370
|
-
:properties => {
|
371
|
-
:state => { :type => :keyword },
|
372
|
-
:errors => {
|
373
|
-
:properties => {
|
374
|
-
:ids => { :type => :keyword },
|
375
|
-
:messages => { :type => :text }
|
376
|
-
}
|
377
|
-
}
|
378
|
-
}
|
379
|
-
}
|
380
|
-
}
|
381
|
-
},
|
382
|
-
:draft => {
|
383
|
-
:properties => {
|
384
|
-
:rules => {
|
385
|
-
:properties => {
|
386
|
-
:id => { :type => :keyword },
|
387
|
-
:policy => { :type => :keyword },
|
388
|
-
:field => { :type => :keyword },
|
389
|
-
:rule => { :type => :keyword },
|
390
|
-
:value => { :type => :keyword },
|
391
|
-
:order => { :type => :short },
|
392
|
-
:created_at => { :type => :date },
|
393
|
-
:updated_at => { :type => :date }
|
394
|
-
}
|
395
|
-
},
|
396
|
-
:advanced_snippet => {
|
397
|
-
:properties => {
|
398
|
-
:value => { :type => :object },
|
399
|
-
:created_at => { :type => :date },
|
400
|
-
:updated_at => { :type => :date }
|
401
|
-
}
|
402
|
-
},
|
403
|
-
:validation => {
|
404
|
-
:properties => {
|
405
|
-
:state => { :type => :keyword },
|
406
|
-
:errors => {
|
407
|
-
:properties => {
|
408
|
-
:ids => { :type => :keyword },
|
409
|
-
:messages => { :type => :text }
|
410
|
-
}
|
411
|
-
}
|
412
|
-
}
|
413
|
-
}
|
414
|
-
}
|
415
|
-
}
|
416
|
-
}
|
417
|
-
},
|
251
|
+
:error => { :type => :text },
|
418
252
|
:index_name => { :type => :keyword },
|
419
|
-
:is_native => { :type => :boolean },
|
420
|
-
:language => { :type => :keyword },
|
421
253
|
:last_seen => { :type => :date },
|
422
|
-
:last_sync_error => { :type => :keyword },
|
423
|
-
:last_sync_status => { :type => :keyword },
|
424
254
|
:last_synced => { :type => :date },
|
425
|
-
:
|
426
|
-
:
|
427
|
-
:name => { :type => :keyword },
|
428
|
-
:pipeline => {
|
429
|
-
:properties => {
|
430
|
-
:extract_binary_content => { :type => :boolean },
|
431
|
-
:name => { :type => :keyword },
|
432
|
-
:reduce_whitespace => { :type => :boolean },
|
433
|
-
:run_ml_inference => { :type => :boolean }
|
434
|
-
}
|
435
|
-
},
|
255
|
+
:last_indexed_document_count => { :type => :integer },
|
256
|
+
:last_deleted_document_count => { :type => :integer },
|
436
257
|
:scheduling => {
|
437
258
|
:properties => {
|
438
259
|
:enabled => { :type => :boolean },
|
@@ -441,7 +262,9 @@ module Core
|
|
441
262
|
},
|
442
263
|
:service_type => { :type => :keyword },
|
443
264
|
:status => { :type => :keyword },
|
444
|
-
:
|
265
|
+
:sync_error => { :type => :text },
|
266
|
+
:sync_now => { :type => :boolean },
|
267
|
+
:sync_status => { :type => :keyword }
|
445
268
|
}
|
446
269
|
}
|
447
270
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -452,139 +275,55 @@ module Core
|
|
452
275
|
def ensure_job_index_exists
|
453
276
|
mappings = {
|
454
277
|
:properties => {
|
455
|
-
:
|
456
|
-
:
|
457
|
-
:completed_at => { :type => :date },
|
458
|
-
:connector => {
|
459
|
-
:properties => {
|
460
|
-
:configuration => { :type => :object },
|
461
|
-
:filtering => {
|
462
|
-
:properties => {
|
463
|
-
:domain => { :type => :keyword },
|
464
|
-
:rules => {
|
465
|
-
:properties => {
|
466
|
-
:id => { :type => :keyword },
|
467
|
-
:policy => { :type => :keyword },
|
468
|
-
:field => { :type => :keyword },
|
469
|
-
:rule => { :type => :keyword },
|
470
|
-
:value => { :type => :keyword },
|
471
|
-
:order => { :type => :short },
|
472
|
-
:created_at => { :type => :date },
|
473
|
-
:updated_at => { :type => :date }
|
474
|
-
}
|
475
|
-
},
|
476
|
-
:advanced_snippet => {
|
477
|
-
:properties => {
|
478
|
-
:value => { :type => :object },
|
479
|
-
:created_at => { :type => :date },
|
480
|
-
:updated_at => { :type => :date }
|
481
|
-
}
|
482
|
-
},
|
483
|
-
:warnings => {
|
484
|
-
:properties => {
|
485
|
-
:ids => { :type => :keyword },
|
486
|
-
:messages => { :type => :text }
|
487
|
-
}
|
488
|
-
}
|
489
|
-
}
|
490
|
-
},
|
491
|
-
:id => { :type => :keyword },
|
492
|
-
:index_name => { :type => :keyword },
|
493
|
-
:language => { :type => :keyword },
|
494
|
-
:pipeline => {
|
495
|
-
:properties => {
|
496
|
-
:extract_binary_content => { :type => :boolean },
|
497
|
-
:name => { :type => :keyword },
|
498
|
-
:reduce_whitespace => { :type => :boolean },
|
499
|
-
:run_ml_inference => { :type => :boolean }
|
500
|
-
}
|
501
|
-
},
|
502
|
-
:service_type => { :type => :keyword }
|
503
|
-
}
|
504
|
-
},
|
505
|
-
:created_at => { :type => :date },
|
506
|
-
:deleted_document_count => { :type => :integer },
|
278
|
+
:connector_id => { :type => :keyword },
|
279
|
+
:status => { :type => :keyword },
|
507
280
|
:error => { :type => :text },
|
281
|
+
:worker_hostname => { :type => :keyword },
|
508
282
|
:indexed_document_count => { :type => :integer },
|
509
|
-
:
|
510
|
-
:
|
511
|
-
:
|
512
|
-
:started_at => { :type => :date },
|
513
|
-
:status => { :type => :keyword },
|
514
|
-
:total_document_count => { :type => :integer },
|
515
|
-
:trigger_method => { :type => :keyword },
|
516
|
-
:worker_hostname => { :type => :keyword }
|
283
|
+
:deleted_document_count => { :type => :integer },
|
284
|
+
:created_at => { :type => :date },
|
285
|
+
:completed_at => { :type => :date }
|
517
286
|
}
|
518
287
|
}
|
519
288
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
520
289
|
end
|
521
290
|
|
522
291
|
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
523
|
-
update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
|
524
|
-
end
|
525
|
-
|
526
|
-
def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
|
527
|
-
update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
|
528
|
-
end
|
529
|
-
|
530
|
-
def document_count(index_name)
|
531
|
-
client.indices.refresh(:index => index_name)
|
532
|
-
client.count(:index => index_name)['count']
|
533
|
-
end
|
534
|
-
|
535
|
-
private
|
536
|
-
|
537
|
-
def should_update_validations?(domain_validations, filtering)
|
538
|
-
domains_present = filtering.collect { |filter| filter[:domain] }
|
539
|
-
domains_to_update = domain_validations.keys
|
540
|
-
|
541
|
-
# non-empty intersection -> domains to update present
|
542
|
-
!(domains_present & domains_to_update).empty?
|
543
|
-
end
|
544
|
-
|
545
|
-
def client
|
546
|
-
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
547
|
-
end
|
548
|
-
|
549
|
-
def get_latest_index_in_alias(alias_name, indicies)
|
550
|
-
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
551
|
-
index_version = index_versions.max # gets the largest suffix number
|
552
|
-
"#{alias_name}-v#{index_version}"
|
553
|
-
end
|
554
|
-
|
555
|
-
def update_filter_validation(filter, domain_validations)
|
556
|
-
domain = filter[:domain]
|
557
|
-
|
558
|
-
if domain_validations.key?(domain)
|
559
|
-
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
560
|
-
filter.deep_merge!(new_validation_state)
|
561
|
-
end
|
562
|
-
end
|
563
|
-
|
564
|
-
def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
|
565
292
|
return if doc.empty?
|
566
293
|
update_args = {
|
567
|
-
:index =>
|
568
|
-
:id =>
|
294
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
295
|
+
:id => connector_id,
|
569
296
|
:body => { :doc => doc },
|
570
297
|
:refresh => true,
|
571
298
|
:retry_on_conflict => 3
|
572
299
|
}
|
573
|
-
|
300
|
+
# seq_no and primary_term are used for optimistic concurrency control
|
301
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
574
302
|
if seq_no && primary_term
|
575
303
|
update_args[:if_seq_no] = seq_no
|
576
304
|
update_args[:if_primary_term] = primary_term
|
577
305
|
update_args.delete(:retry_on_conflict)
|
578
306
|
end
|
579
|
-
|
580
307
|
begin
|
581
308
|
client.update(update_args)
|
582
309
|
rescue Elastic::Transport::Transport::Errors::Conflict
|
583
310
|
# VersionConflictException
|
584
311
|
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
585
|
-
raise ConnectorVersionChangedError.new(
|
312
|
+
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
586
313
|
end
|
587
314
|
end
|
315
|
+
|
316
|
+
private
|
317
|
+
|
318
|
+
def client
|
319
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
|
+
end
|
321
|
+
|
322
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
323
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
324
|
+
index_version = index_versions.max # gets the largest suffix number
|
325
|
+
"#{alias_name}-v#{index_version}"
|
326
|
+
end
|
588
327
|
end
|
589
328
|
end
|
590
329
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module OutputSink
|
11
|
+
class BaseSink
|
12
|
+
def ingest(_document)
|
13
|
+
raise 'not implemented'
|
14
|
+
end
|
15
|
+
|
16
|
+
def ingest_multiple(_documents)
|
17
|
+
raise 'not implemented'
|
18
|
+
end
|
19
|
+
|
20
|
+
def delete(_id)
|
21
|
+
raise 'not implemented'
|
22
|
+
end
|
23
|
+
|
24
|
+
def delete_multiple(_ids)
|
25
|
+
raise 'not implemented'
|
26
|
+
end
|
27
|
+
|
28
|
+
def flush(_size: nil)
|
29
|
+
raise 'not implemented'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink/base_sink'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core::OutputSink
|
13
|
+
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
+
def initialize(sinks = [])
|
15
|
+
@sinks = sinks
|
16
|
+
end
|
17
|
+
|
18
|
+
def ingest(document)
|
19
|
+
@sinks.each { |sink| sink.ingest(document) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def flush(size: nil)
|
23
|
+
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def ingest_multiple(documents)
|
27
|
+
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def delete(id)
|
31
|
+
@sinks.each { |sink| sink.delete(id) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def delete_multiple(ids)
|
35
|
+
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core::OutputSink
|
13
|
+
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
+
def ingest(document)
|
15
|
+
print_header 'Got a single document:'
|
16
|
+
puts document
|
17
|
+
end
|
18
|
+
|
19
|
+
def flush(size: nil)
|
20
|
+
print_header 'Flushing'
|
21
|
+
puts "Flush size: #{size}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def ingest_multiple(documents)
|
25
|
+
print_header 'Got multiple documents:'
|
26
|
+
puts documents
|
27
|
+
end
|
28
|
+
|
29
|
+
def delete(id)
|
30
|
+
print_header "Deleting single id: #{id}"
|
31
|
+
puts id
|
32
|
+
end
|
33
|
+
|
34
|
+
def delete_multiple(ids)
|
35
|
+
print_header "Deleting several ids: #{ids}"
|
36
|
+
puts ids
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def print_delim
|
42
|
+
puts '----------------------------------------------------'
|
43
|
+
end
|
44
|
+
|
45
|
+
def print_header(header)
|
46
|
+
print_delim
|
47
|
+
puts header
|
48
|
+
print_delim
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/numeric/time'
|
10
|
+
require 'app/config'
|
11
|
+
require 'core/output_sink/base_sink'
|
12
|
+
require 'utility/es_client'
|
13
|
+
require 'utility/logger'
|
14
|
+
|
15
|
+
module Core::OutputSink
|
16
|
+
class EsSink < Core::OutputSink::BaseSink
|
17
|
+
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
+
super()
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = []
|
23
|
+
@flush_threshold = flush_threshold
|
24
|
+
end
|
25
|
+
|
26
|
+
def ingest(document)
|
27
|
+
return if document.blank?
|
28
|
+
|
29
|
+
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
+
flush if ready_to_flush?
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(doc_id)
|
34
|
+
return if doc_id.nil?
|
35
|
+
|
36
|
+
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
+
flush if ready_to_flush?
|
38
|
+
end
|
39
|
+
|
40
|
+
def flush(size: nil)
|
41
|
+
flush_size = size || @flush_threshold
|
42
|
+
|
43
|
+
while @operation_queue.any?
|
44
|
+
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
+
send_data(data_to_flush)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def ingest_multiple(documents)
|
50
|
+
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
+
documents.each { |doc| ingest(doc) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def delete_multiple(ids)
|
55
|
+
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
attr_accessor :index_name
|
62
|
+
|
63
|
+
def send_data(ops)
|
64
|
+
return if ops.empty?
|
65
|
+
|
66
|
+
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
+
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
+
end
|
69
|
+
|
70
|
+
def ready_to_flush?
|
71
|
+
@operation_queue.size >= @flush_threshold
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|