connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/app.rb +0 -4
- data/lib/app/dispatcher.rb +17 -42
- data/lib/app/preflight_check.rb +0 -11
- data/lib/connectors/base/connector.rb +14 -43
- data/lib/connectors/example/connector.rb +0 -6
- data/lib/connectors/gitlab/connector.rb +1 -6
- data/lib/connectors/mongodb/connector.rb +43 -47
- data/lib/connectors/sync_status.rb +1 -6
- data/lib/core/configuration.rb +1 -3
- data/lib/core/connector_settings.rb +16 -52
- data/lib/core/elastic_connector_actions.rb +59 -320
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
- data/lib/core/scheduler.rb +10 -40
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +16 -72
- data/lib/core.rb +0 -4
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/errors.rb +12 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +4 -11
- metadata +9 -27
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
- data/lib/connectors/tolerable_error_helper.rb +0 -43
- data/lib/core/connector_job.rb +0 -210
- data/lib/core/filtering/post_process_engine.rb +0 -39
- data/lib/core/filtering/post_process_result.rb +0 -27
- data/lib/core/filtering/simple_rule.rb +0 -141
- data/lib/core/filtering/validation_job_runner.rb +0 -53
- data/lib/core/filtering/validation_status.rb +0 -17
- data/lib/core/filtering.rb +0 -17
- data/lib/core/ingestion/es_sink.rb +0 -118
- data/lib/core/jobs/consumer.rb +0 -114
- data/lib/core/jobs/producer.rb +0 -26
- data/lib/utility/bulk_queue.rb +0 -85
- data/lib/utility/error_monitor.rb +0 -108
- data/lib/utility/filtering.rb +0 -22
@@ -19,12 +19,6 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
class JobNotCreatedError < StandardError
|
23
|
-
def initialize(connector_id, response)
|
24
|
-
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
22
|
class ConnectorVersionChangedError < StandardError
|
29
23
|
def initialize(connector_id, seq_no, primary_term)
|
30
24
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -49,17 +43,10 @@ module Core
|
|
49
43
|
end
|
50
44
|
|
51
45
|
def get_connector(connector_id)
|
52
|
-
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
53
46
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
54
47
|
end
|
55
48
|
|
56
|
-
def get_job(job_id)
|
57
|
-
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
-
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
-
end
|
60
|
-
|
61
49
|
def connectors_meta
|
62
|
-
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
63
50
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
64
51
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
65
52
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -78,19 +65,6 @@ module Core
|
|
78
65
|
)
|
79
66
|
end
|
80
67
|
|
81
|
-
def search_jobs(query, page_size, offset)
|
82
|
-
client.search(
|
83
|
-
:index => Utility::Constants::JOB_INDEX,
|
84
|
-
:ignore => 404,
|
85
|
-
:body => {
|
86
|
-
:size => page_size,
|
87
|
-
:from => offset,
|
88
|
-
:query => query,
|
89
|
-
:sort => ['created_at']
|
90
|
-
}
|
91
|
-
)
|
92
|
-
end
|
93
|
-
|
94
68
|
def update_connector_configuration(connector_id, configuration)
|
95
69
|
update_connector_fields(connector_id, :configuration => configuration)
|
96
70
|
end
|
@@ -110,57 +84,11 @@ module Core
|
|
110
84
|
update_connector_configuration(connector_id, payload)
|
111
85
|
end
|
112
86
|
|
113
|
-
def
|
114
|
-
return if filter_validation_results.empty?
|
115
|
-
|
116
|
-
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
-
|
118
|
-
case filtering
|
119
|
-
when Hash
|
120
|
-
update_filter_validation(filtering, filter_validation_results)
|
121
|
-
when Array
|
122
|
-
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
-
|
124
|
-
filtering.each do |filter|
|
125
|
-
update_filter_validation(filter, filter_validation_results)
|
126
|
-
end
|
127
|
-
else
|
128
|
-
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
-
return
|
130
|
-
end
|
131
|
-
|
132
|
-
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
-
end
|
134
|
-
|
135
|
-
def update_connector_sync_now(connector_id, sync_now)
|
136
|
-
doc = connector_with_concurrency_control(connector_id)
|
137
|
-
|
138
|
-
body = { sync_now: sync_now, last_synced: Time.now }
|
139
|
-
|
140
|
-
update_connector_fields(
|
141
|
-
connector_id,
|
142
|
-
body,
|
143
|
-
doc[:seq_no],
|
144
|
-
doc[:primary_term]
|
145
|
-
)
|
146
|
-
end
|
147
|
-
|
148
|
-
def update_connector_last_sync_status(connector_id, last_sync_status)
|
149
|
-
doc = connector_with_concurrency_control(connector_id)
|
150
|
-
|
151
|
-
update_connector_fields(
|
152
|
-
connector_id,
|
153
|
-
{ last_sync_status: last_sync_status },
|
154
|
-
doc[:seq_no],
|
155
|
-
doc[:primary_term]
|
156
|
-
)
|
157
|
-
end
|
158
|
-
|
159
|
-
def connector_with_concurrency_control(connector_id)
|
87
|
+
def claim_job(connector_id)
|
160
88
|
seq_no = nil
|
161
89
|
primary_term = nil
|
162
|
-
|
163
|
-
|
90
|
+
sync_in_progress = false
|
91
|
+
connector_record = client.get(
|
164
92
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
165
93
|
:id => connector_id,
|
166
94
|
:ignore => 404,
|
@@ -168,42 +96,39 @@ module Core
|
|
168
96
|
).tap do |response|
|
169
97
|
seq_no = response['_seq_no']
|
170
98
|
primary_term = response['_primary_term']
|
99
|
+
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
171
100
|
end
|
101
|
+
if sync_in_progress
|
102
|
+
raise JobAlreadyRunningError.new(connector_id)
|
103
|
+
end
|
104
|
+
update_connector_fields(
|
105
|
+
connector_id,
|
106
|
+
{ :sync_now => false,
|
107
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
+
:last_synced => Time.now },
|
109
|
+
seq_no,
|
110
|
+
primary_term
|
111
|
+
)
|
172
112
|
|
173
|
-
{ doc: doc, seq_no: seq_no, primary_term: primary_term }
|
174
|
-
end
|
175
|
-
|
176
|
-
def create_job(connector_settings:)
|
177
113
|
body = {
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
184
|
-
index_name: connector_settings.index_name,
|
185
|
-
language: connector_settings[:language],
|
186
|
-
pipeline: connector_settings[:pipeline],
|
187
|
-
service_type: connector_settings.service_type
|
188
|
-
}
|
114
|
+
:connector_id => connector_id,
|
115
|
+
:status => Connectors::SyncStatus::IN_PROGRESS,
|
116
|
+
:worker_hostname => Socket.gethostname,
|
117
|
+
:created_at => Time.now,
|
118
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
189
119
|
}
|
190
120
|
|
191
|
-
|
192
|
-
|
193
|
-
return index_response if index_response['result'] == 'created'
|
194
|
-
|
195
|
-
raise JobNotCreatedError.new(connector_settings.id, index_response)
|
121
|
+
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
196
122
|
end
|
197
123
|
|
198
124
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
199
125
|
return [] unless connector_filtering
|
200
126
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
201
127
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
202
|
-
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
203
128
|
job_filtering << {
|
204
129
|
'domain' => filtering_domain['domain'],
|
205
130
|
'rules' => filtering_domain.dig('active', 'rules'),
|
206
|
-
'advanced_snippet' =>
|
131
|
+
'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
|
207
132
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
208
133
|
}
|
209
134
|
end
|
@@ -220,33 +145,22 @@ module Core
|
|
220
145
|
update_connector_fields(connector_id, body)
|
221
146
|
end
|
222
147
|
|
223
|
-
def
|
224
|
-
|
225
|
-
:doc => { :last_seen => Time.now }.merge(metadata)
|
226
|
-
}
|
227
|
-
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
228
|
-
end
|
229
|
-
|
230
|
-
def complete_sync(connector_id, job_id, metadata, error)
|
231
|
-
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
232
|
-
|
233
|
-
metadata ||= {}
|
148
|
+
def complete_sync(connector_id, job_id, status)
|
149
|
+
sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
234
150
|
|
235
151
|
update_connector_fields(connector_id,
|
236
152
|
:last_sync_status => sync_status,
|
237
|
-
:last_sync_error => error,
|
238
|
-
:error => error,
|
153
|
+
:last_sync_error => status[:error],
|
154
|
+
:error => status[:error],
|
239
155
|
:last_synced => Time.now,
|
240
|
-
:last_indexed_document_count =>
|
241
|
-
:last_deleted_document_count =>
|
156
|
+
:last_indexed_document_count => status[:indexed_document_count],
|
157
|
+
:last_deleted_document_count => status[:deleted_document_count])
|
242
158
|
|
243
159
|
body = {
|
244
160
|
:doc => {
|
245
161
|
:status => sync_status,
|
246
|
-
:completed_at => Time.now
|
247
|
-
|
248
|
-
:error => error
|
249
|
-
}.merge(metadata)
|
162
|
+
:completed_at => Time.now
|
163
|
+
}.merge(status)
|
250
164
|
}
|
251
165
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
252
166
|
end
|
@@ -334,105 +248,12 @@ module Core
|
|
334
248
|
:properties => {
|
335
249
|
:api_key_id => { :type => :keyword },
|
336
250
|
:configuration => { :type => :object },
|
337
|
-
:
|
338
|
-
:error => { :type => :keyword },
|
339
|
-
:features => {
|
340
|
-
:properties => {
|
341
|
-
:filtering_advanced_config => { :type => :boolean },
|
342
|
-
:filtering_rules => { :type => :boolean }
|
343
|
-
}
|
344
|
-
},
|
345
|
-
:filtering => {
|
346
|
-
:properties => {
|
347
|
-
:domain => { :type => :keyword },
|
348
|
-
:active => {
|
349
|
-
:properties => {
|
350
|
-
:rules => {
|
351
|
-
:properties => {
|
352
|
-
:id => { :type => :keyword },
|
353
|
-
:policy => { :type => :keyword },
|
354
|
-
:field => { :type => :keyword },
|
355
|
-
:rule => { :type => :keyword },
|
356
|
-
:value => { :type => :keyword },
|
357
|
-
:order => { :type => :short },
|
358
|
-
:created_at => { :type => :date },
|
359
|
-
:updated_at => { :type => :date }
|
360
|
-
}
|
361
|
-
},
|
362
|
-
:advanced_snippet => {
|
363
|
-
:properties => {
|
364
|
-
:value => { :type => :object },
|
365
|
-
:created_at => { :type => :date },
|
366
|
-
:updated_at => { :type => :date }
|
367
|
-
}
|
368
|
-
},
|
369
|
-
:validation => {
|
370
|
-
:properties => {
|
371
|
-
:state => { :type => :keyword },
|
372
|
-
:errors => {
|
373
|
-
:properties => {
|
374
|
-
:ids => { :type => :keyword },
|
375
|
-
:messages => { :type => :text }
|
376
|
-
}
|
377
|
-
}
|
378
|
-
}
|
379
|
-
}
|
380
|
-
}
|
381
|
-
},
|
382
|
-
:draft => {
|
383
|
-
:properties => {
|
384
|
-
:rules => {
|
385
|
-
:properties => {
|
386
|
-
:id => { :type => :keyword },
|
387
|
-
:policy => { :type => :keyword },
|
388
|
-
:field => { :type => :keyword },
|
389
|
-
:rule => { :type => :keyword },
|
390
|
-
:value => { :type => :keyword },
|
391
|
-
:order => { :type => :short },
|
392
|
-
:created_at => { :type => :date },
|
393
|
-
:updated_at => { :type => :date }
|
394
|
-
}
|
395
|
-
},
|
396
|
-
:advanced_snippet => {
|
397
|
-
:properties => {
|
398
|
-
:value => { :type => :object },
|
399
|
-
:created_at => { :type => :date },
|
400
|
-
:updated_at => { :type => :date }
|
401
|
-
}
|
402
|
-
},
|
403
|
-
:validation => {
|
404
|
-
:properties => {
|
405
|
-
:state => { :type => :keyword },
|
406
|
-
:errors => {
|
407
|
-
:properties => {
|
408
|
-
:ids => { :type => :keyword },
|
409
|
-
:messages => { :type => :text }
|
410
|
-
}
|
411
|
-
}
|
412
|
-
}
|
413
|
-
}
|
414
|
-
}
|
415
|
-
}
|
416
|
-
}
|
417
|
-
},
|
251
|
+
:error => { :type => :text },
|
418
252
|
:index_name => { :type => :keyword },
|
419
|
-
:is_native => { :type => :boolean },
|
420
|
-
:language => { :type => :keyword },
|
421
253
|
:last_seen => { :type => :date },
|
422
|
-
:last_sync_error => { :type => :keyword },
|
423
|
-
:last_sync_status => { :type => :keyword },
|
424
254
|
:last_synced => { :type => :date },
|
425
|
-
:
|
426
|
-
:
|
427
|
-
:name => { :type => :keyword },
|
428
|
-
:pipeline => {
|
429
|
-
:properties => {
|
430
|
-
:extract_binary_content => { :type => :boolean },
|
431
|
-
:name => { :type => :keyword },
|
432
|
-
:reduce_whitespace => { :type => :boolean },
|
433
|
-
:run_ml_inference => { :type => :boolean }
|
434
|
-
}
|
435
|
-
},
|
255
|
+
:last_indexed_document_count => { :type => :integer },
|
256
|
+
:last_deleted_document_count => { :type => :integer },
|
436
257
|
:scheduling => {
|
437
258
|
:properties => {
|
438
259
|
:enabled => { :type => :boolean },
|
@@ -441,7 +262,9 @@ module Core
|
|
441
262
|
},
|
442
263
|
:service_type => { :type => :keyword },
|
443
264
|
:status => { :type => :keyword },
|
444
|
-
:
|
265
|
+
:sync_error => { :type => :text },
|
266
|
+
:sync_now => { :type => :boolean },
|
267
|
+
:sync_status => { :type => :keyword }
|
445
268
|
}
|
446
269
|
}
|
447
270
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -452,139 +275,55 @@ module Core
|
|
452
275
|
def ensure_job_index_exists
|
453
276
|
mappings = {
|
454
277
|
:properties => {
|
455
|
-
:
|
456
|
-
:
|
457
|
-
:completed_at => { :type => :date },
|
458
|
-
:connector => {
|
459
|
-
:properties => {
|
460
|
-
:configuration => { :type => :object },
|
461
|
-
:filtering => {
|
462
|
-
:properties => {
|
463
|
-
:domain => { :type => :keyword },
|
464
|
-
:rules => {
|
465
|
-
:properties => {
|
466
|
-
:id => { :type => :keyword },
|
467
|
-
:policy => { :type => :keyword },
|
468
|
-
:field => { :type => :keyword },
|
469
|
-
:rule => { :type => :keyword },
|
470
|
-
:value => { :type => :keyword },
|
471
|
-
:order => { :type => :short },
|
472
|
-
:created_at => { :type => :date },
|
473
|
-
:updated_at => { :type => :date }
|
474
|
-
}
|
475
|
-
},
|
476
|
-
:advanced_snippet => {
|
477
|
-
:properties => {
|
478
|
-
:value => { :type => :object },
|
479
|
-
:created_at => { :type => :date },
|
480
|
-
:updated_at => { :type => :date }
|
481
|
-
}
|
482
|
-
},
|
483
|
-
:warnings => {
|
484
|
-
:properties => {
|
485
|
-
:ids => { :type => :keyword },
|
486
|
-
:messages => { :type => :text }
|
487
|
-
}
|
488
|
-
}
|
489
|
-
}
|
490
|
-
},
|
491
|
-
:id => { :type => :keyword },
|
492
|
-
:index_name => { :type => :keyword },
|
493
|
-
:language => { :type => :keyword },
|
494
|
-
:pipeline => {
|
495
|
-
:properties => {
|
496
|
-
:extract_binary_content => { :type => :boolean },
|
497
|
-
:name => { :type => :keyword },
|
498
|
-
:reduce_whitespace => { :type => :boolean },
|
499
|
-
:run_ml_inference => { :type => :boolean }
|
500
|
-
}
|
501
|
-
},
|
502
|
-
:service_type => { :type => :keyword }
|
503
|
-
}
|
504
|
-
},
|
505
|
-
:created_at => { :type => :date },
|
506
|
-
:deleted_document_count => { :type => :integer },
|
278
|
+
:connector_id => { :type => :keyword },
|
279
|
+
:status => { :type => :keyword },
|
507
280
|
:error => { :type => :text },
|
281
|
+
:worker_hostname => { :type => :keyword },
|
508
282
|
:indexed_document_count => { :type => :integer },
|
509
|
-
:
|
510
|
-
:
|
511
|
-
:
|
512
|
-
:started_at => { :type => :date },
|
513
|
-
:status => { :type => :keyword },
|
514
|
-
:total_document_count => { :type => :integer },
|
515
|
-
:trigger_method => { :type => :keyword },
|
516
|
-
:worker_hostname => { :type => :keyword }
|
283
|
+
:deleted_document_count => { :type => :integer },
|
284
|
+
:created_at => { :type => :date },
|
285
|
+
:completed_at => { :type => :date }
|
517
286
|
}
|
518
287
|
}
|
519
288
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
520
289
|
end
|
521
290
|
|
522
291
|
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
523
|
-
update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
|
524
|
-
end
|
525
|
-
|
526
|
-
def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
|
527
|
-
update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
|
528
|
-
end
|
529
|
-
|
530
|
-
def document_count(index_name)
|
531
|
-
client.indices.refresh(:index => index_name)
|
532
|
-
client.count(:index => index_name)['count']
|
533
|
-
end
|
534
|
-
|
535
|
-
private
|
536
|
-
|
537
|
-
def should_update_validations?(domain_validations, filtering)
|
538
|
-
domains_present = filtering.collect { |filter| filter[:domain] }
|
539
|
-
domains_to_update = domain_validations.keys
|
540
|
-
|
541
|
-
# non-empty intersection -> domains to update present
|
542
|
-
!(domains_present & domains_to_update).empty?
|
543
|
-
end
|
544
|
-
|
545
|
-
def client
|
546
|
-
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
547
|
-
end
|
548
|
-
|
549
|
-
def get_latest_index_in_alias(alias_name, indicies)
|
550
|
-
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
551
|
-
index_version = index_versions.max # gets the largest suffix number
|
552
|
-
"#{alias_name}-v#{index_version}"
|
553
|
-
end
|
554
|
-
|
555
|
-
def update_filter_validation(filter, domain_validations)
|
556
|
-
domain = filter[:domain]
|
557
|
-
|
558
|
-
if domain_validations.key?(domain)
|
559
|
-
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
560
|
-
filter.deep_merge!(new_validation_state)
|
561
|
-
end
|
562
|
-
end
|
563
|
-
|
564
|
-
def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
|
565
292
|
return if doc.empty?
|
566
293
|
update_args = {
|
567
|
-
:index =>
|
568
|
-
:id =>
|
294
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
295
|
+
:id => connector_id,
|
569
296
|
:body => { :doc => doc },
|
570
297
|
:refresh => true,
|
571
298
|
:retry_on_conflict => 3
|
572
299
|
}
|
573
|
-
|
300
|
+
# seq_no and primary_term are used for optimistic concurrency control
|
301
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
574
302
|
if seq_no && primary_term
|
575
303
|
update_args[:if_seq_no] = seq_no
|
576
304
|
update_args[:if_primary_term] = primary_term
|
577
305
|
update_args.delete(:retry_on_conflict)
|
578
306
|
end
|
579
|
-
|
580
307
|
begin
|
581
308
|
client.update(update_args)
|
582
309
|
rescue Elastic::Transport::Transport::Errors::Conflict
|
583
310
|
# VersionConflictException
|
584
311
|
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
585
|
-
raise ConnectorVersionChangedError.new(
|
312
|
+
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
586
313
|
end
|
587
314
|
end
|
315
|
+
|
316
|
+
private
|
317
|
+
|
318
|
+
def client
|
319
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
|
+
end
|
321
|
+
|
322
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
323
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
324
|
+
index_version = index_versions.max # gets the largest suffix number
|
325
|
+
"#{alias_name}-v#{index_version}"
|
326
|
+
end
|
588
327
|
end
|
589
328
|
end
|
590
329
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module OutputSink
|
11
|
+
class BaseSink
|
12
|
+
def ingest(_document)
|
13
|
+
raise 'not implemented'
|
14
|
+
end
|
15
|
+
|
16
|
+
def ingest_multiple(_documents)
|
17
|
+
raise 'not implemented'
|
18
|
+
end
|
19
|
+
|
20
|
+
def delete(_id)
|
21
|
+
raise 'not implemented'
|
22
|
+
end
|
23
|
+
|
24
|
+
def delete_multiple(_ids)
|
25
|
+
raise 'not implemented'
|
26
|
+
end
|
27
|
+
|
28
|
+
def flush(_size: nil)
|
29
|
+
raise 'not implemented'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink/base_sink'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core::OutputSink
|
13
|
+
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
+
def initialize(sinks = [])
|
15
|
+
@sinks = sinks
|
16
|
+
end
|
17
|
+
|
18
|
+
def ingest(document)
|
19
|
+
@sinks.each { |sink| sink.ingest(document) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def flush(size: nil)
|
23
|
+
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def ingest_multiple(documents)
|
27
|
+
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def delete(id)
|
31
|
+
@sinks.each { |sink| sink.delete(id) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def delete_multiple(ids)
|
35
|
+
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core::OutputSink
|
13
|
+
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
+
def ingest(document)
|
15
|
+
print_header 'Got a single document:'
|
16
|
+
puts document
|
17
|
+
end
|
18
|
+
|
19
|
+
def flush(size: nil)
|
20
|
+
print_header 'Flushing'
|
21
|
+
puts "Flush size: #{size}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def ingest_multiple(documents)
|
25
|
+
print_header 'Got multiple documents:'
|
26
|
+
puts documents
|
27
|
+
end
|
28
|
+
|
29
|
+
def delete(id)
|
30
|
+
print_header "Deleting single id: #{id}"
|
31
|
+
puts id
|
32
|
+
end
|
33
|
+
|
34
|
+
def delete_multiple(ids)
|
35
|
+
print_header "Deleting several ids: #{ids}"
|
36
|
+
puts ids
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def print_delim
|
42
|
+
puts '----------------------------------------------------'
|
43
|
+
end
|
44
|
+
|
45
|
+
def print_header(header)
|
46
|
+
print_delim
|
47
|
+
puts header
|
48
|
+
print_delim
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/numeric/time'
|
10
|
+
require 'app/config'
|
11
|
+
require 'core/output_sink/base_sink'
|
12
|
+
require 'utility/es_client'
|
13
|
+
require 'utility/logger'
|
14
|
+
|
15
|
+
module Core::OutputSink
|
16
|
+
class EsSink < Core::OutputSink::BaseSink
|
17
|
+
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
+
super()
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = []
|
23
|
+
@flush_threshold = flush_threshold
|
24
|
+
end
|
25
|
+
|
26
|
+
def ingest(document)
|
27
|
+
return if document.blank?
|
28
|
+
|
29
|
+
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
+
flush if ready_to_flush?
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(doc_id)
|
34
|
+
return if doc_id.nil?
|
35
|
+
|
36
|
+
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
+
flush if ready_to_flush?
|
38
|
+
end
|
39
|
+
|
40
|
+
def flush(size: nil)
|
41
|
+
flush_size = size || @flush_threshold
|
42
|
+
|
43
|
+
while @operation_queue.any?
|
44
|
+
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
+
send_data(data_to_flush)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def ingest_multiple(documents)
|
50
|
+
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
+
documents.each { |doc| ingest(doc) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def delete_multiple(ids)
|
55
|
+
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
attr_accessor :index_name
|
62
|
+
|
63
|
+
def send_data(ops)
|
64
|
+
return if ops.empty?
|
65
|
+
|
66
|
+
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
+
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
+
end
|
69
|
+
|
70
|
+
def ready_to_flush?
|
71
|
+
@operation_queue.size >= @flush_threshold
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|