connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +0 -4
  4. data/lib/app/dispatcher.rb +17 -42
  5. data/lib/app/preflight_check.rb +0 -11
  6. data/lib/connectors/base/connector.rb +14 -43
  7. data/lib/connectors/example/connector.rb +0 -6
  8. data/lib/connectors/gitlab/connector.rb +1 -6
  9. data/lib/connectors/mongodb/connector.rb +43 -47
  10. data/lib/connectors/sync_status.rb +1 -6
  11. data/lib/core/configuration.rb +1 -3
  12. data/lib/core/connector_settings.rb +16 -52
  13. data/lib/core/elastic_connector_actions.rb +59 -320
  14. data/lib/core/output_sink/base_sink.rb +33 -0
  15. data/lib/core/output_sink/combined_sink.rb +38 -0
  16. data/lib/core/output_sink/console_sink.rb +51 -0
  17. data/lib/core/output_sink/es_sink.rb +74 -0
  18. data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
  19. data/lib/core/scheduler.rb +10 -40
  20. data/lib/core/single_scheduler.rb +1 -1
  21. data/lib/core/sync_job_runner.rb +16 -72
  22. data/lib/core.rb +0 -4
  23. data/lib/utility/constants.rb +0 -2
  24. data/lib/utility/errors.rb +12 -0
  25. data/lib/utility/logger.rb +1 -1
  26. data/lib/utility.rb +4 -11
  27. metadata +9 -27
  28. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
  29. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  30. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  31. data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
  32. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
  33. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
  34. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
  35. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
  36. data/lib/connectors/tolerable_error_helper.rb +0 -43
  37. data/lib/core/connector_job.rb +0 -210
  38. data/lib/core/filtering/post_process_engine.rb +0 -39
  39. data/lib/core/filtering/post_process_result.rb +0 -27
  40. data/lib/core/filtering/simple_rule.rb +0 -141
  41. data/lib/core/filtering/validation_job_runner.rb +0 -53
  42. data/lib/core/filtering/validation_status.rb +0 -17
  43. data/lib/core/filtering.rb +0 -17
  44. data/lib/core/ingestion/es_sink.rb +0 -118
  45. data/lib/core/jobs/consumer.rb +0 -114
  46. data/lib/core/jobs/producer.rb +0 -26
  47. data/lib/utility/bulk_queue.rb +0 -85
  48. data/lib/utility/error_monitor.rb +0 -108
  49. data/lib/utility/filtering.rb +0 -22
@@ -19,12 +19,6 @@ module Core
19
19
  end
20
20
  end
21
21
 
22
- class JobNotCreatedError < StandardError
23
- def initialize(connector_id, response)
24
- super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
- end
26
- end
27
-
28
22
  class ConnectorVersionChangedError < StandardError
29
23
  def initialize(connector_id, seq_no, primary_term)
30
24
  super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
@@ -49,17 +43,10 @@ module Core
49
43
  end
50
44
 
51
45
  def get_connector(connector_id)
52
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
53
46
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
54
47
  end
55
48
 
56
- def get_job(job_id)
57
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
- client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
- end
60
-
61
49
  def connectors_meta
62
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
63
50
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
64
51
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
65
52
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -78,19 +65,6 @@ module Core
78
65
  )
79
66
  end
80
67
 
81
- def search_jobs(query, page_size, offset)
82
- client.search(
83
- :index => Utility::Constants::JOB_INDEX,
84
- :ignore => 404,
85
- :body => {
86
- :size => page_size,
87
- :from => offset,
88
- :query => query,
89
- :sort => ['created_at']
90
- }
91
- )
92
- end
93
-
94
68
  def update_connector_configuration(connector_id, configuration)
95
69
  update_connector_fields(connector_id, :configuration => configuration)
96
70
  end
@@ -110,57 +84,11 @@ module Core
110
84
  update_connector_configuration(connector_id, payload)
111
85
  end
112
86
 
113
- def update_filtering_validation(connector_id, filter_validation_results)
114
- return if filter_validation_results.empty?
115
-
116
- filtering = get_connector(connector_id).dig(:_source, :filtering)
117
-
118
- case filtering
119
- when Hash
120
- update_filter_validation(filtering, filter_validation_results)
121
- when Array
122
- return unless should_update_validations?(filter_validation_results, filtering)
123
-
124
- filtering.each do |filter|
125
- update_filter_validation(filter, filter_validation_results)
126
- end
127
- else
128
- Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
- return
130
- end
131
-
132
- update_connector_fields(connector_id, { :filtering => filtering })
133
- end
134
-
135
- def update_connector_sync_now(connector_id, sync_now)
136
- doc = connector_with_concurrency_control(connector_id)
137
-
138
- body = { sync_now: sync_now, last_synced: Time.now }
139
-
140
- update_connector_fields(
141
- connector_id,
142
- body,
143
- doc[:seq_no],
144
- doc[:primary_term]
145
- )
146
- end
147
-
148
- def update_connector_last_sync_status(connector_id, last_sync_status)
149
- doc = connector_with_concurrency_control(connector_id)
150
-
151
- update_connector_fields(
152
- connector_id,
153
- { last_sync_status: last_sync_status },
154
- doc[:seq_no],
155
- doc[:primary_term]
156
- )
157
- end
158
-
159
- def connector_with_concurrency_control(connector_id)
87
+ def claim_job(connector_id)
160
88
  seq_no = nil
161
89
  primary_term = nil
162
-
163
- doc = client.get(
90
+ sync_in_progress = false
91
+ connector_record = client.get(
164
92
  :index => Utility::Constants::CONNECTORS_INDEX,
165
93
  :id => connector_id,
166
94
  :ignore => 404,
@@ -168,42 +96,39 @@ module Core
168
96
  ).tap do |response|
169
97
  seq_no = response['_seq_no']
170
98
  primary_term = response['_primary_term']
99
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
171
100
  end
101
+ if sync_in_progress
102
+ raise JobAlreadyRunningError.new(connector_id)
103
+ end
104
+ update_connector_fields(
105
+ connector_id,
106
+ { :sync_now => false,
107
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
108
+ :last_synced => Time.now },
109
+ seq_no,
110
+ primary_term
111
+ )
172
112
 
173
- { doc: doc, seq_no: seq_no, primary_term: primary_term }
174
- end
175
-
176
- def create_job(connector_settings:)
177
113
  body = {
178
- status: Connectors::SyncStatus::PENDING,
179
- created_at: Time.now,
180
- last_seen: Time.now,
181
- connector: {
182
- id: connector_settings.id,
183
- filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
184
- index_name: connector_settings.index_name,
185
- language: connector_settings[:language],
186
- pipeline: connector_settings[:pipeline],
187
- service_type: connector_settings.service_type
188
- }
114
+ :connector_id => connector_id,
115
+ :status => Connectors::SyncStatus::IN_PROGRESS,
116
+ :worker_hostname => Socket.gethostname,
117
+ :created_at => Time.now,
118
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
189
119
  }
190
120
 
191
- index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
192
-
193
- return index_response if index_response['result'] == 'created'
194
-
195
- raise JobNotCreatedError.new(connector_settings.id, index_response)
121
+ client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
196
122
  end
197
123
 
198
124
  def convert_connector_filtering_to_job_filtering(connector_filtering)
199
125
  return [] unless connector_filtering
200
126
  connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
201
127
  connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
202
- snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
203
128
  job_filtering << {
204
129
  'domain' => filtering_domain['domain'],
205
130
  'rules' => filtering_domain.dig('active', 'rules'),
206
- 'advanced_snippet' => snippet['value'] || snippet,
131
+ 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
207
132
  'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
208
133
  }
209
134
  end
@@ -220,33 +145,22 @@ module Core
220
145
  update_connector_fields(connector_id, body)
221
146
  end
222
147
 
223
- def update_sync(job_id, metadata)
224
- body = {
225
- :doc => { :last_seen => Time.now }.merge(metadata)
226
- }
227
- client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
228
- end
229
-
230
- def complete_sync(connector_id, job_id, metadata, error)
231
- sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
232
-
233
- metadata ||= {}
148
+ def complete_sync(connector_id, job_id, status)
149
+ sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
234
150
 
235
151
  update_connector_fields(connector_id,
236
152
  :last_sync_status => sync_status,
237
- :last_sync_error => error,
238
- :error => error,
153
+ :last_sync_error => status[:error],
154
+ :error => status[:error],
239
155
  :last_synced => Time.now,
240
- :last_indexed_document_count => metadata[:indexed_document_count],
241
- :last_deleted_document_count => metadata[:deleted_document_count])
156
+ :last_indexed_document_count => status[:indexed_document_count],
157
+ :last_deleted_document_count => status[:deleted_document_count])
242
158
 
243
159
  body = {
244
160
  :doc => {
245
161
  :status => sync_status,
246
- :completed_at => Time.now,
247
- :last_seen => Time.now,
248
- :error => error
249
- }.merge(metadata)
162
+ :completed_at => Time.now
163
+ }.merge(status)
250
164
  }
251
165
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
252
166
  end
@@ -334,105 +248,12 @@ module Core
334
248
  :properties => {
335
249
  :api_key_id => { :type => :keyword },
336
250
  :configuration => { :type => :object },
337
- :description => { :type => :text },
338
- :error => { :type => :keyword },
339
- :features => {
340
- :properties => {
341
- :filtering_advanced_config => { :type => :boolean },
342
- :filtering_rules => { :type => :boolean }
343
- }
344
- },
345
- :filtering => {
346
- :properties => {
347
- :domain => { :type => :keyword },
348
- :active => {
349
- :properties => {
350
- :rules => {
351
- :properties => {
352
- :id => { :type => :keyword },
353
- :policy => { :type => :keyword },
354
- :field => { :type => :keyword },
355
- :rule => { :type => :keyword },
356
- :value => { :type => :keyword },
357
- :order => { :type => :short },
358
- :created_at => { :type => :date },
359
- :updated_at => { :type => :date }
360
- }
361
- },
362
- :advanced_snippet => {
363
- :properties => {
364
- :value => { :type => :object },
365
- :created_at => { :type => :date },
366
- :updated_at => { :type => :date }
367
- }
368
- },
369
- :validation => {
370
- :properties => {
371
- :state => { :type => :keyword },
372
- :errors => {
373
- :properties => {
374
- :ids => { :type => :keyword },
375
- :messages => { :type => :text }
376
- }
377
- }
378
- }
379
- }
380
- }
381
- },
382
- :draft => {
383
- :properties => {
384
- :rules => {
385
- :properties => {
386
- :id => { :type => :keyword },
387
- :policy => { :type => :keyword },
388
- :field => { :type => :keyword },
389
- :rule => { :type => :keyword },
390
- :value => { :type => :keyword },
391
- :order => { :type => :short },
392
- :created_at => { :type => :date },
393
- :updated_at => { :type => :date }
394
- }
395
- },
396
- :advanced_snippet => {
397
- :properties => {
398
- :value => { :type => :object },
399
- :created_at => { :type => :date },
400
- :updated_at => { :type => :date }
401
- }
402
- },
403
- :validation => {
404
- :properties => {
405
- :state => { :type => :keyword },
406
- :errors => {
407
- :properties => {
408
- :ids => { :type => :keyword },
409
- :messages => { :type => :text }
410
- }
411
- }
412
- }
413
- }
414
- }
415
- }
416
- }
417
- },
251
+ :error => { :type => :text },
418
252
  :index_name => { :type => :keyword },
419
- :is_native => { :type => :boolean },
420
- :language => { :type => :keyword },
421
253
  :last_seen => { :type => :date },
422
- :last_sync_error => { :type => :keyword },
423
- :last_sync_status => { :type => :keyword },
424
254
  :last_synced => { :type => :date },
425
- :last_deleted_document_count => { :type => :long },
426
- :last_indexed_document_count => { :type => :long },
427
- :name => { :type => :keyword },
428
- :pipeline => {
429
- :properties => {
430
- :extract_binary_content => { :type => :boolean },
431
- :name => { :type => :keyword },
432
- :reduce_whitespace => { :type => :boolean },
433
- :run_ml_inference => { :type => :boolean }
434
- }
435
- },
255
+ :last_indexed_document_count => { :type => :integer },
256
+ :last_deleted_document_count => { :type => :integer },
436
257
  :scheduling => {
437
258
  :properties => {
438
259
  :enabled => { :type => :boolean },
@@ -441,7 +262,9 @@ module Core
441
262
  },
442
263
  :service_type => { :type => :keyword },
443
264
  :status => { :type => :keyword },
444
- :sync_now => { :type => :boolean }
265
+ :sync_error => { :type => :text },
266
+ :sync_now => { :type => :boolean },
267
+ :sync_status => { :type => :keyword }
445
268
  }
446
269
  }
447
270
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -452,139 +275,55 @@ module Core
452
275
  def ensure_job_index_exists
453
276
  mappings = {
454
277
  :properties => {
455
- :cancelation_requested_at => { :type => :date },
456
- :canceled_at => { :type => :date },
457
- :completed_at => { :type => :date },
458
- :connector => {
459
- :properties => {
460
- :configuration => { :type => :object },
461
- :filtering => {
462
- :properties => {
463
- :domain => { :type => :keyword },
464
- :rules => {
465
- :properties => {
466
- :id => { :type => :keyword },
467
- :policy => { :type => :keyword },
468
- :field => { :type => :keyword },
469
- :rule => { :type => :keyword },
470
- :value => { :type => :keyword },
471
- :order => { :type => :short },
472
- :created_at => { :type => :date },
473
- :updated_at => { :type => :date }
474
- }
475
- },
476
- :advanced_snippet => {
477
- :properties => {
478
- :value => { :type => :object },
479
- :created_at => { :type => :date },
480
- :updated_at => { :type => :date }
481
- }
482
- },
483
- :warnings => {
484
- :properties => {
485
- :ids => { :type => :keyword },
486
- :messages => { :type => :text }
487
- }
488
- }
489
- }
490
- },
491
- :id => { :type => :keyword },
492
- :index_name => { :type => :keyword },
493
- :language => { :type => :keyword },
494
- :pipeline => {
495
- :properties => {
496
- :extract_binary_content => { :type => :boolean },
497
- :name => { :type => :keyword },
498
- :reduce_whitespace => { :type => :boolean },
499
- :run_ml_inference => { :type => :boolean }
500
- }
501
- },
502
- :service_type => { :type => :keyword }
503
- }
504
- },
505
- :created_at => { :type => :date },
506
- :deleted_document_count => { :type => :integer },
278
+ :connector_id => { :type => :keyword },
279
+ :status => { :type => :keyword },
507
280
  :error => { :type => :text },
281
+ :worker_hostname => { :type => :keyword },
508
282
  :indexed_document_count => { :type => :integer },
509
- :indexed_document_volume => { :type => :integer },
510
- :last_seen => { :type => :date },
511
- :metadata => { :type => :object },
512
- :started_at => { :type => :date },
513
- :status => { :type => :keyword },
514
- :total_document_count => { :type => :integer },
515
- :trigger_method => { :type => :keyword },
516
- :worker_hostname => { :type => :keyword }
283
+ :deleted_document_count => { :type => :integer },
284
+ :created_at => { :type => :date },
285
+ :completed_at => { :type => :date }
517
286
  }
518
287
  }
519
288
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
520
289
  end
521
290
 
522
291
  def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
523
- update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
524
- end
525
-
526
- def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
527
- update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
528
- end
529
-
530
- def document_count(index_name)
531
- client.indices.refresh(:index => index_name)
532
- client.count(:index => index_name)['count']
533
- end
534
-
535
- private
536
-
537
- def should_update_validations?(domain_validations, filtering)
538
- domains_present = filtering.collect { |filter| filter[:domain] }
539
- domains_to_update = domain_validations.keys
540
-
541
- # non-empty intersection -> domains to update present
542
- !(domains_present & domains_to_update).empty?
543
- end
544
-
545
- def client
546
- @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
547
- end
548
-
549
- def get_latest_index_in_alias(alias_name, indicies)
550
- index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
551
- index_version = index_versions.max # gets the largest suffix number
552
- "#{alias_name}-v#{index_version}"
553
- end
554
-
555
- def update_filter_validation(filter, domain_validations)
556
- domain = filter[:domain]
557
-
558
- if domain_validations.key?(domain)
559
- new_validation_state = { :draft => { :validation => domain_validations[domain] } }
560
- filter.deep_merge!(new_validation_state)
561
- end
562
- end
563
-
564
- def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
565
292
  return if doc.empty?
566
293
  update_args = {
567
- :index => index,
568
- :id => id,
294
+ :index => Utility::Constants::CONNECTORS_INDEX,
295
+ :id => connector_id,
569
296
  :body => { :doc => doc },
570
297
  :refresh => true,
571
298
  :retry_on_conflict => 3
572
299
  }
573
-
300
+ # seq_no and primary_term are used for optimistic concurrency control
301
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
574
302
  if seq_no && primary_term
575
303
  update_args[:if_seq_no] = seq_no
576
304
  update_args[:if_primary_term] = primary_term
577
305
  update_args.delete(:retry_on_conflict)
578
306
  end
579
-
580
307
  begin
581
308
  client.update(update_args)
582
309
  rescue Elastic::Transport::Transport::Errors::Conflict
583
310
  # VersionConflictException
584
311
  # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
585
- raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
312
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
586
313
  end
587
314
  end
315
+
316
+ private
317
+
318
+ def client
319
+ @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
320
+ end
321
+
322
+ def get_latest_index_in_alias(alias_name, indicies)
323
+ index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
324
+ index_version = index_versions.max # gets the largest suffix number
325
+ "#{alias_name}-v#{index_version}"
326
+ end
588
327
  end
589
328
  end
590
329
  end
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module OutputSink
11
+ class BaseSink
12
+ def ingest(_document)
13
+ raise 'not implemented'
14
+ end
15
+
16
+ def ingest_multiple(_documents)
17
+ raise 'not implemented'
18
+ end
19
+
20
+ def delete(_id)
21
+ raise 'not implemented'
22
+ end
23
+
24
+ def delete_multiple(_ids)
25
+ raise 'not implemented'
26
+ end
27
+
28
+ def flush(_size: nil)
29
+ raise 'not implemented'
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink/base_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class CombinedSink < Core::OutputSink::BaseSink
14
+ def initialize(sinks = [])
15
+ @sinks = sinks
16
+ end
17
+
18
+ def ingest(document)
19
+ @sinks.each { |sink| sink.ingest(document) }
20
+ end
21
+
22
+ def flush(size: nil)
23
+ @sinks.each { |sink| sink.flush(size: size) }
24
+ end
25
+
26
+ def ingest_multiple(documents)
27
+ @sinks.each { |sink| sink.ingest_multiple(documents) }
28
+ end
29
+
30
+ def delete(id)
31
+ @sinks.each { |sink| sink.delete(id) }
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ @sinks.each { |sink| sink.delete_multiple(ids) }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,51 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class ConsoleSink < Core::OutputSink::BaseSink
14
+ def ingest(document)
15
+ print_header 'Got a single document:'
16
+ puts document
17
+ end
18
+
19
+ def flush(size: nil)
20
+ print_header 'Flushing'
21
+ puts "Flush size: #{size}"
22
+ end
23
+
24
+ def ingest_multiple(documents)
25
+ print_header 'Got multiple documents:'
26
+ puts documents
27
+ end
28
+
29
+ def delete(id)
30
+ print_header "Deleting single id: #{id}"
31
+ puts id
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ print_header "Deleting several ids: #{ids}"
36
+ puts ids
37
+ end
38
+
39
+ private
40
+
41
+ def print_delim
42
+ puts '----------------------------------------------------'
43
+ end
44
+
45
+ def print_header(header)
46
+ print_delim
47
+ puts header
48
+ print_delim
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/numeric/time'
10
+ require 'app/config'
11
+ require 'core/output_sink/base_sink'
12
+ require 'utility/es_client'
13
+ require 'utility/logger'
14
+
15
+ module Core::OutputSink
16
+ class EsSink < Core::OutputSink::BaseSink
17
+ def initialize(index_name, request_pipeline, flush_threshold = 50)
18
+ super()
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = []
23
+ @flush_threshold = flush_threshold
24
+ end
25
+
26
+ def ingest(document)
27
+ return if document.blank?
28
+
29
+ @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
+ flush if ready_to_flush?
31
+ end
32
+
33
+ def delete(doc_id)
34
+ return if doc_id.nil?
35
+
36
+ @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
+ flush if ready_to_flush?
38
+ end
39
+
40
+ def flush(size: nil)
41
+ flush_size = size || @flush_threshold
42
+
43
+ while @operation_queue.any?
44
+ data_to_flush = @operation_queue.pop(flush_size)
45
+ send_data(data_to_flush)
46
+ end
47
+ end
48
+
49
+ def ingest_multiple(documents)
50
+ Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
+ documents.each { |doc| ingest(doc) }
52
+ end
53
+
54
+ def delete_multiple(ids)
55
+ Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ private
60
+
61
+ attr_accessor :index_name
62
+
63
+ def send_data(ops)
64
+ return if ops.empty?
65
+
66
+ @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
+ Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
+ end
69
+
70
+ def ready_to_flush?
71
+ @operation_queue.size >= @flush_threshold
72
+ end
73
+ end
74
+ end