connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T010623Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -8
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/config.rb +3 -0
  5. data/lib/app/dispatcher.rb +44 -17
  6. data/lib/app/preflight_check.rb +11 -0
  7. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  8. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  9. data/lib/connectors/base/connector.rb +43 -14
  10. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  11. data/lib/connectors/example/connector.rb +6 -0
  12. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  13. data/lib/connectors/gitlab/connector.rb +6 -1
  14. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  15. data/lib/connectors/mongodb/connector.rb +47 -43
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  17. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  18. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  19. data/lib/connectors/sync_status.rb +6 -1
  20. data/lib/connectors/tolerable_error_helper.rb +43 -0
  21. data/lib/connectors_app/// +13 -0
  22. data/lib/core/configuration.rb +3 -1
  23. data/lib/core/connector_job.rb +210 -0
  24. data/lib/core/connector_settings.rb +52 -16
  25. data/lib/core/elastic_connector_actions.rb +320 -59
  26. data/lib/core/filtering/post_process_engine.rb +39 -0
  27. data/lib/core/filtering/post_process_result.rb +27 -0
  28. data/lib/core/filtering/simple_rule.rb +141 -0
  29. data/lib/core/filtering/validation_job_runner.rb +53 -0
  30. data/lib/core/filtering/validation_status.rb +17 -0
  31. data/lib/core/filtering.rb +17 -0
  32. data/lib/core/ingestion/es_sink.rb +118 -0
  33. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  34. data/lib/core/jobs/consumer.rb +132 -0
  35. data/lib/core/jobs/producer.rb +26 -0
  36. data/lib/core/scheduler.rb +40 -10
  37. data/lib/core/single_scheduler.rb +1 -1
  38. data/lib/core/sync_job_runner.rb +80 -16
  39. data/lib/core.rb +4 -0
  40. data/lib/utility/bulk_queue.rb +87 -0
  41. data/lib/utility/constants.rb +7 -0
  42. data/lib/utility/error_monitor.rb +108 -0
  43. data/lib/utility/errors.rb +0 -12
  44. data/lib/utility/filtering.rb +22 -0
  45. data/lib/utility/logger.rb +1 -1
  46. data/lib/utility.rb +11 -4
  47. metadata +31 -12
  48. data/lib/core/output_sink/base_sink.rb +0 -33
  49. data/lib/core/output_sink/combined_sink.rb +0 -38
  50. data/lib/core/output_sink/console_sink.rb +0 -51
  51. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -1,51 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class ConsoleSink < Core::OutputSink::BaseSink
14
- def ingest(document)
15
- print_header 'Got a single document:'
16
- puts document
17
- end
18
-
19
- def flush(size: nil)
20
- print_header 'Flushing'
21
- puts "Flush size: #{size}"
22
- end
23
-
24
- def ingest_multiple(documents)
25
- print_header 'Got multiple documents:'
26
- puts documents
27
- end
28
-
29
- def delete(id)
30
- print_header "Deleting single id: #{id}"
31
- puts id
32
- end
33
-
34
- def delete_multiple(ids)
35
- print_header "Deleting several ids: #{ids}"
36
- puts ids
37
- end
38
-
39
- private
40
-
41
- def print_delim
42
- puts '----------------------------------------------------'
43
- end
44
-
45
- def print_header(header)
46
- print_delim
47
- puts header
48
- print_delim
49
- end
50
- end
51
- end
@@ -1,74 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'active_support/core_ext/numeric/time'
10
- require 'app/config'
11
- require 'core/output_sink/base_sink'
12
- require 'utility/es_client'
13
- require 'utility/logger'
14
-
15
- module Core::OutputSink
16
- class EsSink < Core::OutputSink::BaseSink
17
- def initialize(index_name, request_pipeline, flush_threshold = 50)
18
- super()
19
- @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
- @index_name = index_name
21
- @request_pipeline = request_pipeline
22
- @operation_queue = []
23
- @flush_threshold = flush_threshold
24
- end
25
-
26
- def ingest(document)
27
- return if document.blank?
28
-
29
- @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
- flush if ready_to_flush?
31
- end
32
-
33
- def delete(doc_id)
34
- return if doc_id.nil?
35
-
36
- @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
- flush if ready_to_flush?
38
- end
39
-
40
- def flush(size: nil)
41
- flush_size = size || @flush_threshold
42
-
43
- while @operation_queue.any?
44
- data_to_flush = @operation_queue.pop(flush_size)
45
- send_data(data_to_flush)
46
- end
47
- end
48
-
49
- def ingest_multiple(documents)
50
- Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
- documents.each { |doc| ingest(doc) }
52
- end
53
-
54
- def delete_multiple(ids)
55
- Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
- ids.each { |id| delete(id) }
57
- end
58
-
59
- private
60
-
61
- attr_accessor :index_name
62
-
63
- def send_data(ops)
64
- return if ops.empty?
65
-
66
- @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
- Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
- end
69
-
70
- def ready_to_flush?
71
- @operation_queue.size >= @flush_threshold
72
- end
73
- end
74
- end