connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/dispatcher.rb +42 -17
  5. data/lib/app/preflight_check.rb +11 -0
  6. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  7. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  8. data/lib/connectors/base/connector.rb +43 -14
  9. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  10. data/lib/connectors/example/connector.rb +6 -0
  11. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  12. data/lib/connectors/gitlab/connector.rb +6 -1
  13. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  14. data/lib/connectors/mongodb/connector.rb +47 -43
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  17. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  18. data/lib/connectors/sync_status.rb +6 -1
  19. data/lib/connectors/tolerable_error_helper.rb +43 -0
  20. data/lib/core/configuration.rb +3 -1
  21. data/lib/core/connector_job.rb +210 -0
  22. data/lib/core/connector_settings.rb +52 -16
  23. data/lib/core/elastic_connector_actions.rb +320 -59
  24. data/lib/core/filtering/post_process_engine.rb +39 -0
  25. data/lib/core/filtering/post_process_result.rb +27 -0
  26. data/lib/core/filtering/simple_rule.rb +141 -0
  27. data/lib/core/filtering/validation_job_runner.rb +53 -0
  28. data/lib/core/filtering/validation_status.rb +17 -0
  29. data/lib/core/filtering.rb +17 -0
  30. data/lib/core/ingestion/es_sink.rb +118 -0
  31. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  32. data/lib/core/jobs/consumer.rb +114 -0
  33. data/lib/core/jobs/producer.rb +26 -0
  34. data/lib/core/scheduler.rb +40 -10
  35. data/lib/core/single_scheduler.rb +1 -1
  36. data/lib/core/sync_job_runner.rb +72 -16
  37. data/lib/core.rb +4 -0
  38. data/lib/utility/bulk_queue.rb +85 -0
  39. data/lib/utility/constants.rb +2 -0
  40. data/lib/utility/error_monitor.rb +108 -0
  41. data/lib/utility/errors.rb +0 -12
  42. data/lib/utility/filtering.rb +22 -0
  43. data/lib/utility/logger.rb +1 -1
  44. data/lib/utility.rb +11 -4
  45. metadata +25 -7
  46. data/lib/core/output_sink/base_sink.rb +0 -33
  47. data/lib/core/output_sink/combined_sink.rb +0 -38
  48. data/lib/core/output_sink/console_sink.rb +0 -51
  49. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -1,74 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'active_support/core_ext/numeric/time'
10
- require 'app/config'
11
- require 'core/output_sink/base_sink'
12
- require 'utility/es_client'
13
- require 'utility/logger'
14
-
15
- module Core::OutputSink
16
- class EsSink < Core::OutputSink::BaseSink
17
- def initialize(index_name, request_pipeline, flush_threshold = 50)
18
- super()
19
- @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
- @index_name = index_name
21
- @request_pipeline = request_pipeline
22
- @operation_queue = []
23
- @flush_threshold = flush_threshold
24
- end
25
-
26
- def ingest(document)
27
- return if document.blank?
28
-
29
- @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
- flush if ready_to_flush?
31
- end
32
-
33
- def delete(doc_id)
34
- return if doc_id.nil?
35
-
36
- @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
- flush if ready_to_flush?
38
- end
39
-
40
- def flush(size: nil)
41
- flush_size = size || @flush_threshold
42
-
43
- while @operation_queue.any?
44
- data_to_flush = @operation_queue.pop(flush_size)
45
- send_data(data_to_flush)
46
- end
47
- end
48
-
49
- def ingest_multiple(documents)
50
- Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
- documents.each { |doc| ingest(doc) }
52
- end
53
-
54
- def delete_multiple(ids)
55
- Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
- ids.each { |id| delete(id) }
57
- end
58
-
59
- private
60
-
61
- attr_accessor :index_name
62
-
63
- def send_data(ops)
64
- return if ops.empty?
65
-
66
- @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
- Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
- end
69
-
70
- def ready_to_flush?
71
- @operation_queue.size >= @flush_threshold
72
- end
73
- end
74
- end