connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +0 -4
  4. data/lib/app/dispatcher.rb +17 -42
  5. data/lib/app/preflight_check.rb +0 -11
  6. data/lib/connectors/base/connector.rb +14 -43
  7. data/lib/connectors/example/connector.rb +0 -6
  8. data/lib/connectors/gitlab/connector.rb +1 -6
  9. data/lib/connectors/mongodb/connector.rb +43 -47
  10. data/lib/connectors/sync_status.rb +1 -6
  11. data/lib/core/configuration.rb +1 -3
  12. data/lib/core/connector_settings.rb +16 -52
  13. data/lib/core/elastic_connector_actions.rb +59 -320
  14. data/lib/core/output_sink/base_sink.rb +33 -0
  15. data/lib/core/output_sink/combined_sink.rb +38 -0
  16. data/lib/core/output_sink/console_sink.rb +51 -0
  17. data/lib/core/output_sink/es_sink.rb +74 -0
  18. data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
  19. data/lib/core/scheduler.rb +10 -40
  20. data/lib/core/single_scheduler.rb +1 -1
  21. data/lib/core/sync_job_runner.rb +16 -72
  22. data/lib/core.rb +0 -4
  23. data/lib/utility/constants.rb +0 -2
  24. data/lib/utility/errors.rb +12 -0
  25. data/lib/utility/logger.rb +1 -1
  26. data/lib/utility.rb +4 -11
  27. metadata +9 -27
  28. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
  29. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  30. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  31. data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
  32. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
  33. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
  34. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
  35. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
  36. data/lib/connectors/tolerable_error_helper.rb +0 -43
  37. data/lib/core/connector_job.rb +0 -210
  38. data/lib/core/filtering/post_process_engine.rb +0 -39
  39. data/lib/core/filtering/post_process_result.rb +0 -27
  40. data/lib/core/filtering/simple_rule.rb +0 -141
  41. data/lib/core/filtering/validation_job_runner.rb +0 -53
  42. data/lib/core/filtering/validation_status.rb +0 -17
  43. data/lib/core/filtering.rb +0 -17
  44. data/lib/core/ingestion/es_sink.rb +0 -118
  45. data/lib/core/jobs/consumer.rb +0 -114
  46. data/lib/core/jobs/producer.rb +0 -26
  47. data/lib/utility/bulk_queue.rb +0 -85
  48. data/lib/utility/error_monitor.rb +0 -108
  49. data/lib/utility/filtering.rb +0 -22
@@ -19,12 +19,6 @@ module Core
19
19
  end
20
20
  end
21
21
 
22
- class JobNotCreatedError < StandardError
23
- def initialize(connector_id, response)
24
- super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
- end
26
- end
27
-
28
22
  class ConnectorVersionChangedError < StandardError
29
23
  def initialize(connector_id, seq_no, primary_term)
30
24
  super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
@@ -49,17 +43,10 @@ module Core
49
43
  end
50
44
 
51
45
  def get_connector(connector_id)
52
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
53
46
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
54
47
  end
55
48
 
56
- def get_job(job_id)
57
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
- client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
- end
60
-
61
49
  def connectors_meta
62
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
63
50
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
64
51
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
65
52
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -78,19 +65,6 @@ module Core
78
65
  )
79
66
  end
80
67
 
81
- def search_jobs(query, page_size, offset)
82
- client.search(
83
- :index => Utility::Constants::JOB_INDEX,
84
- :ignore => 404,
85
- :body => {
86
- :size => page_size,
87
- :from => offset,
88
- :query => query,
89
- :sort => ['created_at']
90
- }
91
- )
92
- end
93
-
94
68
  def update_connector_configuration(connector_id, configuration)
95
69
  update_connector_fields(connector_id, :configuration => configuration)
96
70
  end
@@ -110,57 +84,11 @@ module Core
110
84
  update_connector_configuration(connector_id, payload)
111
85
  end
112
86
 
113
- def update_filtering_validation(connector_id, filter_validation_results)
114
- return if filter_validation_results.empty?
115
-
116
- filtering = get_connector(connector_id).dig(:_source, :filtering)
117
-
118
- case filtering
119
- when Hash
120
- update_filter_validation(filtering, filter_validation_results)
121
- when Array
122
- return unless should_update_validations?(filter_validation_results, filtering)
123
-
124
- filtering.each do |filter|
125
- update_filter_validation(filter, filter_validation_results)
126
- end
127
- else
128
- Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
- return
130
- end
131
-
132
- update_connector_fields(connector_id, { :filtering => filtering })
133
- end
134
-
135
- def update_connector_sync_now(connector_id, sync_now)
136
- doc = connector_with_concurrency_control(connector_id)
137
-
138
- body = { sync_now: sync_now, last_synced: Time.now }
139
-
140
- update_connector_fields(
141
- connector_id,
142
- body,
143
- doc[:seq_no],
144
- doc[:primary_term]
145
- )
146
- end
147
-
148
- def update_connector_last_sync_status(connector_id, last_sync_status)
149
- doc = connector_with_concurrency_control(connector_id)
150
-
151
- update_connector_fields(
152
- connector_id,
153
- { last_sync_status: last_sync_status },
154
- doc[:seq_no],
155
- doc[:primary_term]
156
- )
157
- end
158
-
159
- def connector_with_concurrency_control(connector_id)
87
+ def claim_job(connector_id)
160
88
  seq_no = nil
161
89
  primary_term = nil
162
-
163
- doc = client.get(
90
+ sync_in_progress = false
91
+ connector_record = client.get(
164
92
  :index => Utility::Constants::CONNECTORS_INDEX,
165
93
  :id => connector_id,
166
94
  :ignore => 404,
@@ -168,42 +96,39 @@ module Core
168
96
  ).tap do |response|
169
97
  seq_no = response['_seq_no']
170
98
  primary_term = response['_primary_term']
99
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
171
100
  end
101
+ if sync_in_progress
102
+ raise JobAlreadyRunningError.new(connector_id)
103
+ end
104
+ update_connector_fields(
105
+ connector_id,
106
+ { :sync_now => false,
107
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
108
+ :last_synced => Time.now },
109
+ seq_no,
110
+ primary_term
111
+ )
172
112
 
173
- { doc: doc, seq_no: seq_no, primary_term: primary_term }
174
- end
175
-
176
- def create_job(connector_settings:)
177
113
  body = {
178
- status: Connectors::SyncStatus::PENDING,
179
- created_at: Time.now,
180
- last_seen: Time.now,
181
- connector: {
182
- id: connector_settings.id,
183
- filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
184
- index_name: connector_settings.index_name,
185
- language: connector_settings[:language],
186
- pipeline: connector_settings[:pipeline],
187
- service_type: connector_settings.service_type
188
- }
114
+ :connector_id => connector_id,
115
+ :status => Connectors::SyncStatus::IN_PROGRESS,
116
+ :worker_hostname => Socket.gethostname,
117
+ :created_at => Time.now,
118
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
189
119
  }
190
120
 
191
- index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
192
-
193
- return index_response if index_response['result'] == 'created'
194
-
195
- raise JobNotCreatedError.new(connector_settings.id, index_response)
121
+ client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
196
122
  end
197
123
 
198
124
  def convert_connector_filtering_to_job_filtering(connector_filtering)
199
125
  return [] unless connector_filtering
200
126
  connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
201
127
  connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
202
- snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
203
128
  job_filtering << {
204
129
  'domain' => filtering_domain['domain'],
205
130
  'rules' => filtering_domain.dig('active', 'rules'),
206
- 'advanced_snippet' => snippet['value'] || snippet,
131
+ 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
207
132
  'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
208
133
  }
209
134
  end
@@ -220,33 +145,22 @@ module Core
220
145
  update_connector_fields(connector_id, body)
221
146
  end
222
147
 
223
- def update_sync(job_id, metadata)
224
- body = {
225
- :doc => { :last_seen => Time.now }.merge(metadata)
226
- }
227
- client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
228
- end
229
-
230
- def complete_sync(connector_id, job_id, metadata, error)
231
- sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
232
-
233
- metadata ||= {}
148
+ def complete_sync(connector_id, job_id, status)
149
+ sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
234
150
 
235
151
  update_connector_fields(connector_id,
236
152
  :last_sync_status => sync_status,
237
- :last_sync_error => error,
238
- :error => error,
153
+ :last_sync_error => status[:error],
154
+ :error => status[:error],
239
155
  :last_synced => Time.now,
240
- :last_indexed_document_count => metadata[:indexed_document_count],
241
- :last_deleted_document_count => metadata[:deleted_document_count])
156
+ :last_indexed_document_count => status[:indexed_document_count],
157
+ :last_deleted_document_count => status[:deleted_document_count])
242
158
 
243
159
  body = {
244
160
  :doc => {
245
161
  :status => sync_status,
246
- :completed_at => Time.now,
247
- :last_seen => Time.now,
248
- :error => error
249
- }.merge(metadata)
162
+ :completed_at => Time.now
163
+ }.merge(status)
250
164
  }
251
165
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
252
166
  end
@@ -334,105 +248,12 @@ module Core
334
248
  :properties => {
335
249
  :api_key_id => { :type => :keyword },
336
250
  :configuration => { :type => :object },
337
- :description => { :type => :text },
338
- :error => { :type => :keyword },
339
- :features => {
340
- :properties => {
341
- :filtering_advanced_config => { :type => :boolean },
342
- :filtering_rules => { :type => :boolean }
343
- }
344
- },
345
- :filtering => {
346
- :properties => {
347
- :domain => { :type => :keyword },
348
- :active => {
349
- :properties => {
350
- :rules => {
351
- :properties => {
352
- :id => { :type => :keyword },
353
- :policy => { :type => :keyword },
354
- :field => { :type => :keyword },
355
- :rule => { :type => :keyword },
356
- :value => { :type => :keyword },
357
- :order => { :type => :short },
358
- :created_at => { :type => :date },
359
- :updated_at => { :type => :date }
360
- }
361
- },
362
- :advanced_snippet => {
363
- :properties => {
364
- :value => { :type => :object },
365
- :created_at => { :type => :date },
366
- :updated_at => { :type => :date }
367
- }
368
- },
369
- :validation => {
370
- :properties => {
371
- :state => { :type => :keyword },
372
- :errors => {
373
- :properties => {
374
- :ids => { :type => :keyword },
375
- :messages => { :type => :text }
376
- }
377
- }
378
- }
379
- }
380
- }
381
- },
382
- :draft => {
383
- :properties => {
384
- :rules => {
385
- :properties => {
386
- :id => { :type => :keyword },
387
- :policy => { :type => :keyword },
388
- :field => { :type => :keyword },
389
- :rule => { :type => :keyword },
390
- :value => { :type => :keyword },
391
- :order => { :type => :short },
392
- :created_at => { :type => :date },
393
- :updated_at => { :type => :date }
394
- }
395
- },
396
- :advanced_snippet => {
397
- :properties => {
398
- :value => { :type => :object },
399
- :created_at => { :type => :date },
400
- :updated_at => { :type => :date }
401
- }
402
- },
403
- :validation => {
404
- :properties => {
405
- :state => { :type => :keyword },
406
- :errors => {
407
- :properties => {
408
- :ids => { :type => :keyword },
409
- :messages => { :type => :text }
410
- }
411
- }
412
- }
413
- }
414
- }
415
- }
416
- }
417
- },
251
+ :error => { :type => :text },
418
252
  :index_name => { :type => :keyword },
419
- :is_native => { :type => :boolean },
420
- :language => { :type => :keyword },
421
253
  :last_seen => { :type => :date },
422
- :last_sync_error => { :type => :keyword },
423
- :last_sync_status => { :type => :keyword },
424
254
  :last_synced => { :type => :date },
425
- :last_deleted_document_count => { :type => :long },
426
- :last_indexed_document_count => { :type => :long },
427
- :name => { :type => :keyword },
428
- :pipeline => {
429
- :properties => {
430
- :extract_binary_content => { :type => :boolean },
431
- :name => { :type => :keyword },
432
- :reduce_whitespace => { :type => :boolean },
433
- :run_ml_inference => { :type => :boolean }
434
- }
435
- },
255
+ :last_indexed_document_count => { :type => :integer },
256
+ :last_deleted_document_count => { :type => :integer },
436
257
  :scheduling => {
437
258
  :properties => {
438
259
  :enabled => { :type => :boolean },
@@ -441,7 +262,9 @@ module Core
441
262
  },
442
263
  :service_type => { :type => :keyword },
443
264
  :status => { :type => :keyword },
444
- :sync_now => { :type => :boolean }
265
+ :sync_error => { :type => :text },
266
+ :sync_now => { :type => :boolean },
267
+ :sync_status => { :type => :keyword }
445
268
  }
446
269
  }
447
270
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -452,139 +275,55 @@ module Core
452
275
  def ensure_job_index_exists
453
276
  mappings = {
454
277
  :properties => {
455
- :cancelation_requested_at => { :type => :date },
456
- :canceled_at => { :type => :date },
457
- :completed_at => { :type => :date },
458
- :connector => {
459
- :properties => {
460
- :configuration => { :type => :object },
461
- :filtering => {
462
- :properties => {
463
- :domain => { :type => :keyword },
464
- :rules => {
465
- :properties => {
466
- :id => { :type => :keyword },
467
- :policy => { :type => :keyword },
468
- :field => { :type => :keyword },
469
- :rule => { :type => :keyword },
470
- :value => { :type => :keyword },
471
- :order => { :type => :short },
472
- :created_at => { :type => :date },
473
- :updated_at => { :type => :date }
474
- }
475
- },
476
- :advanced_snippet => {
477
- :properties => {
478
- :value => { :type => :object },
479
- :created_at => { :type => :date },
480
- :updated_at => { :type => :date }
481
- }
482
- },
483
- :warnings => {
484
- :properties => {
485
- :ids => { :type => :keyword },
486
- :messages => { :type => :text }
487
- }
488
- }
489
- }
490
- },
491
- :id => { :type => :keyword },
492
- :index_name => { :type => :keyword },
493
- :language => { :type => :keyword },
494
- :pipeline => {
495
- :properties => {
496
- :extract_binary_content => { :type => :boolean },
497
- :name => { :type => :keyword },
498
- :reduce_whitespace => { :type => :boolean },
499
- :run_ml_inference => { :type => :boolean }
500
- }
501
- },
502
- :service_type => { :type => :keyword }
503
- }
504
- },
505
- :created_at => { :type => :date },
506
- :deleted_document_count => { :type => :integer },
278
+ :connector_id => { :type => :keyword },
279
+ :status => { :type => :keyword },
507
280
  :error => { :type => :text },
281
+ :worker_hostname => { :type => :keyword },
508
282
  :indexed_document_count => { :type => :integer },
509
- :indexed_document_volume => { :type => :integer },
510
- :last_seen => { :type => :date },
511
- :metadata => { :type => :object },
512
- :started_at => { :type => :date },
513
- :status => { :type => :keyword },
514
- :total_document_count => { :type => :integer },
515
- :trigger_method => { :type => :keyword },
516
- :worker_hostname => { :type => :keyword }
283
+ :deleted_document_count => { :type => :integer },
284
+ :created_at => { :type => :date },
285
+ :completed_at => { :type => :date }
517
286
  }
518
287
  }
519
288
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
520
289
  end
521
290
 
522
291
  def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
523
- update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
524
- end
525
-
526
- def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
527
- update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
528
- end
529
-
530
- def document_count(index_name)
531
- client.indices.refresh(:index => index_name)
532
- client.count(:index => index_name)['count']
533
- end
534
-
535
- private
536
-
537
- def should_update_validations?(domain_validations, filtering)
538
- domains_present = filtering.collect { |filter| filter[:domain] }
539
- domains_to_update = domain_validations.keys
540
-
541
- # non-empty intersection -> domains to update present
542
- !(domains_present & domains_to_update).empty?
543
- end
544
-
545
- def client
546
- @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
547
- end
548
-
549
- def get_latest_index_in_alias(alias_name, indicies)
550
- index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
551
- index_version = index_versions.max # gets the largest suffix number
552
- "#{alias_name}-v#{index_version}"
553
- end
554
-
555
- def update_filter_validation(filter, domain_validations)
556
- domain = filter[:domain]
557
-
558
- if domain_validations.key?(domain)
559
- new_validation_state = { :draft => { :validation => domain_validations[domain] } }
560
- filter.deep_merge!(new_validation_state)
561
- end
562
- end
563
-
564
- def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
565
292
  return if doc.empty?
566
293
  update_args = {
567
- :index => index,
568
- :id => id,
294
+ :index => Utility::Constants::CONNECTORS_INDEX,
295
+ :id => connector_id,
569
296
  :body => { :doc => doc },
570
297
  :refresh => true,
571
298
  :retry_on_conflict => 3
572
299
  }
573
-
300
+ # seq_no and primary_term are used for optimistic concurrency control
301
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
574
302
  if seq_no && primary_term
575
303
  update_args[:if_seq_no] = seq_no
576
304
  update_args[:if_primary_term] = primary_term
577
305
  update_args.delete(:retry_on_conflict)
578
306
  end
579
-
580
307
  begin
581
308
  client.update(update_args)
582
309
  rescue Elastic::Transport::Transport::Errors::Conflict
583
310
  # VersionConflictException
584
311
  # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
585
- raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
312
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
586
313
  end
587
314
  end
315
+
316
+ private
317
+
318
+ def client
319
+ @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
320
+ end
321
+
322
+ def get_latest_index_in_alias(alias_name, indicies)
323
+ index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
324
+ index_version = index_versions.max # gets the largest suffix number
325
+ "#{alias_name}-v#{index_version}"
326
+ end
588
327
  end
589
328
  end
590
329
  end
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module OutputSink
11
+ class BaseSink
12
+ def ingest(_document)
13
+ raise 'not implemented'
14
+ end
15
+
16
+ def ingest_multiple(_documents)
17
+ raise 'not implemented'
18
+ end
19
+
20
+ def delete(_id)
21
+ raise 'not implemented'
22
+ end
23
+
24
+ def delete_multiple(_ids)
25
+ raise 'not implemented'
26
+ end
27
+
28
+ def flush(_size: nil)
29
+ raise 'not implemented'
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink/base_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class CombinedSink < Core::OutputSink::BaseSink
14
+ def initialize(sinks = [])
15
+ @sinks = sinks
16
+ end
17
+
18
+ def ingest(document)
19
+ @sinks.each { |sink| sink.ingest(document) }
20
+ end
21
+
22
+ def flush(size: nil)
23
+ @sinks.each { |sink| sink.flush(size: size) }
24
+ end
25
+
26
+ def ingest_multiple(documents)
27
+ @sinks.each { |sink| sink.ingest_multiple(documents) }
28
+ end
29
+
30
+ def delete(id)
31
+ @sinks.each { |sink| sink.delete(id) }
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ @sinks.each { |sink| sink.delete_multiple(ids) }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,51 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class ConsoleSink < Core::OutputSink::BaseSink
14
+ def ingest(document)
15
+ print_header 'Got a single document:'
16
+ puts document
17
+ end
18
+
19
+ def flush(size: nil)
20
+ print_header 'Flushing'
21
+ puts "Flush size: #{size}"
22
+ end
23
+
24
+ def ingest_multiple(documents)
25
+ print_header 'Got multiple documents:'
26
+ puts documents
27
+ end
28
+
29
+ def delete(id)
30
+ print_header "Deleting single id: #{id}"
31
+ puts id
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ print_header "Deleting several ids: #{ids}"
36
+ puts ids
37
+ end
38
+
39
+ private
40
+
41
+ def print_delim
42
+ puts '----------------------------------------------------'
43
+ end
44
+
45
+ def print_header(header)
46
+ print_delim
47
+ puts header
48
+ print_delim
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/numeric/time'
10
+ require 'app/config'
11
+ require 'core/output_sink/base_sink'
12
+ require 'utility/es_client'
13
+ require 'utility/logger'
14
+
15
+ module Core::OutputSink
16
+ class EsSink < Core::OutputSink::BaseSink
17
+ def initialize(index_name, request_pipeline, flush_threshold = 50)
18
+ super()
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = []
23
+ @flush_threshold = flush_threshold
24
+ end
25
+
26
+ def ingest(document)
27
+ return if document.blank?
28
+
29
+ @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
+ flush if ready_to_flush?
31
+ end
32
+
33
+ def delete(doc_id)
34
+ return if doc_id.nil?
35
+
36
+ @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
+ flush if ready_to_flush?
38
+ end
39
+
40
+ def flush(size: nil)
41
+ flush_size = size || @flush_threshold
42
+
43
+ while @operation_queue.any?
44
+ data_to_flush = @operation_queue.pop(flush_size)
45
+ send_data(data_to_flush)
46
+ end
47
+ end
48
+
49
+ def ingest_multiple(documents)
50
+ Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
+ documents.each { |doc| ingest(doc) }
52
+ end
53
+
54
+ def delete_multiple(ids)
55
+ Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ private
60
+
61
+ attr_accessor :index_name
62
+
63
+ def send_data(ops)
64
+ return if ops.empty?
65
+
66
+ @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
+ Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
+ end
69
+
70
+ def ready_to_flush?
71
+ @operation_queue.size >= @flush_threshold
72
+ end
73
+ end
74
+ end