connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/dispatcher.rb +12 -0
  4. data/lib/app/preflight_check.rb +11 -0
  5. data/lib/connectors/base/connector.rb +19 -12
  6. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  7. data/lib/connectors/example/connector.rb +15 -0
  8. data/lib/connectors/gitlab/connector.rb +15 -1
  9. data/lib/connectors/mongodb/connector.rb +55 -36
  10. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  11. data/lib/core/configuration.rb +3 -1
  12. data/lib/core/connector_job.rb +137 -0
  13. data/lib/core/connector_settings.rb +24 -11
  14. data/lib/core/elastic_connector_actions.rb +263 -24
  15. data/lib/core/filtering/post_process_engine.rb +39 -0
  16. data/lib/core/filtering/post_process_result.rb +27 -0
  17. data/lib/core/filtering/simple_rule.rb +141 -0
  18. data/lib/core/filtering/validation_job_runner.rb +53 -0
  19. data/lib/core/filtering/validation_status.rb +17 -0
  20. data/lib/core/filtering.rb +17 -0
  21. data/lib/core/ingestion/es_sink.rb +59 -0
  22. data/lib/core/ingestion/ingester.rb +90 -0
  23. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  24. data/lib/core/scheduler.rb +40 -10
  25. data/lib/core/sync_job_runner.rb +65 -17
  26. data/lib/core.rb +2 -0
  27. data/lib/utility/bulk_queue.rb +85 -0
  28. data/lib/utility/constants.rb +2 -0
  29. data/lib/utility/filtering.rb +22 -0
  30. data/lib/utility/logger.rb +2 -1
  31. data/lib/utility.rb +5 -4
  32. metadata +16 -7
  33. data/lib/core/output_sink/base_sink.rb +0 -33
  34. data/lib/core/output_sink/combined_sink.rb +0 -38
  35. data/lib/core/output_sink/console_sink.rb +0 -51
  36. data/lib/core/output_sink/es_sink.rb +0 -74
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221104T200814Z
4
+ version: 8.6.0.4.pre.20221114T233727Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-04 00:00:00.000000000 Z
11
+ date: 2022-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -398,6 +398,7 @@ files:
398
398
  - lib/connectors/base/adapter.rb
399
399
  - lib/connectors/base/connector.rb
400
400
  - lib/connectors/base/custom_client.rb
401
+ - lib/connectors/base/simple_rules_parser.rb
401
402
  - lib/connectors/connector_status.rb
402
403
  - lib/connectors/crawler/scheduler.rb
403
404
  - lib/connectors/example/attachments/first_attachment.txt
@@ -409,21 +410,27 @@ files:
409
410
  - lib/connectors/gitlab/custom_client.rb
410
411
  - lib/connectors/gitlab/extractor.rb
411
412
  - lib/connectors/mongodb/connector.rb
413
+ - lib/connectors/mongodb/mongo_rules_parser.rb
412
414
  - lib/connectors/registry.rb
413
415
  - lib/connectors/sync_status.rb
414
416
  - lib/connectors_service.rb
415
417
  - lib/connectors_utility.rb
416
418
  - lib/core.rb
417
419
  - lib/core/configuration.rb
420
+ - lib/core/connector_job.rb
418
421
  - lib/core/connector_settings.rb
419
422
  - lib/core/elastic_connector_actions.rb
423
+ - lib/core/filtering.rb
424
+ - lib/core/filtering/post_process_engine.rb
425
+ - lib/core/filtering/post_process_result.rb
426
+ - lib/core/filtering/simple_rule.rb
427
+ - lib/core/filtering/validation_job_runner.rb
428
+ - lib/core/filtering/validation_status.rb
420
429
  - lib/core/heartbeat.rb
430
+ - lib/core/ingestion.rb
431
+ - lib/core/ingestion/es_sink.rb
432
+ - lib/core/ingestion/ingester.rb
421
433
  - lib/core/native_scheduler.rb
422
- - lib/core/output_sink.rb
423
- - lib/core/output_sink/base_sink.rb
424
- - lib/core/output_sink/combined_sink.rb
425
- - lib/core/output_sink/console_sink.rb
426
- - lib/core/output_sink/es_sink.rb
427
434
  - lib/core/scheduler.rb
428
435
  - lib/core/single_scheduler.rb
429
436
  - lib/core/sync_job_runner.rb
@@ -432,6 +439,7 @@ files:
432
439
  - lib/stubs/connectors/stats.rb
433
440
  - lib/stubs/service_type.rb
434
441
  - lib/utility.rb
442
+ - lib/utility/bulk_queue.rb
435
443
  - lib/utility/common.rb
436
444
  - lib/utility/constants.rb
437
445
  - lib/utility/cron.rb
@@ -443,6 +451,7 @@ files:
443
451
  - lib/utility/es_client.rb
444
452
  - lib/utility/exception_tracking.rb
445
453
  - lib/utility/extension_mapping_util.rb
454
+ - lib/utility/filtering.rb
446
455
  - lib/utility/logger.rb
447
456
  - lib/utility/middleware/basic_auth.rb
448
457
  - lib/utility/middleware/bearer_auth.rb
@@ -1,33 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module OutputSink
11
- class BaseSink
12
- def ingest(_document)
13
- raise 'not implemented'
14
- end
15
-
16
- def ingest_multiple(_documents)
17
- raise 'not implemented'
18
- end
19
-
20
- def delete(_id)
21
- raise 'not implemented'
22
- end
23
-
24
- def delete_multiple(_ids)
25
- raise 'not implemented'
26
- end
27
-
28
- def flush(_size: nil)
29
- raise 'not implemented'
30
- end
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink/base_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class CombinedSink < Core::OutputSink::BaseSink
14
- def initialize(sinks = [])
15
- @sinks = sinks
16
- end
17
-
18
- def ingest(document)
19
- @sinks.each { |sink| sink.ingest(document) }
20
- end
21
-
22
- def flush(size: nil)
23
- @sinks.each { |sink| sink.flush(size: size) }
24
- end
25
-
26
- def ingest_multiple(documents)
27
- @sinks.each { |sink| sink.ingest_multiple(documents) }
28
- end
29
-
30
- def delete(id)
31
- @sinks.each { |sink| sink.delete(id) }
32
- end
33
-
34
- def delete_multiple(ids)
35
- @sinks.each { |sink| sink.delete_multiple(ids) }
36
- end
37
- end
38
- end
@@ -1,51 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class ConsoleSink < Core::OutputSink::BaseSink
14
- def ingest(document)
15
- print_header 'Got a single document:'
16
- puts document
17
- end
18
-
19
- def flush(size: nil)
20
- print_header 'Flushing'
21
- puts "Flush size: #{size}"
22
- end
23
-
24
- def ingest_multiple(documents)
25
- print_header 'Got multiple documents:'
26
- puts documents
27
- end
28
-
29
- def delete(id)
30
- print_header "Deleting single id: #{id}"
31
- puts id
32
- end
33
-
34
- def delete_multiple(ids)
35
- print_header "Deleting several ids: #{ids}"
36
- puts ids
37
- end
38
-
39
- private
40
-
41
- def print_delim
42
- puts '----------------------------------------------------'
43
- end
44
-
45
- def print_header(header)
46
- print_delim
47
- puts header
48
- print_delim
49
- end
50
- end
51
- end
@@ -1,74 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'active_support/core_ext/numeric/time'
10
- require 'app/config'
11
- require 'core/output_sink/base_sink'
12
- require 'utility/es_client'
13
- require 'utility/logger'
14
-
15
- module Core::OutputSink
16
- class EsSink < Core::OutputSink::BaseSink
17
- def initialize(index_name, request_pipeline, flush_threshold = 50)
18
- super()
19
- @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
- @index_name = index_name
21
- @request_pipeline = request_pipeline
22
- @operation_queue = []
23
- @flush_threshold = flush_threshold
24
- end
25
-
26
- def ingest(document)
27
- return if document.blank?
28
-
29
- @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
- flush if ready_to_flush?
31
- end
32
-
33
- def delete(doc_id)
34
- return if doc_id.nil?
35
-
36
- @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
- flush if ready_to_flush?
38
- end
39
-
40
- def flush(size: nil)
41
- flush_size = size || @flush_threshold
42
-
43
- while @operation_queue.any?
44
- data_to_flush = @operation_queue.pop(flush_size)
45
- send_data(data_to_flush)
46
- end
47
- end
48
-
49
- def ingest_multiple(documents)
50
- Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
- documents.each { |doc| ingest(doc) }
52
- end
53
-
54
- def delete_multiple(ids)
55
- Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
- ids.each { |id| delete(id) }
57
- end
58
-
59
- private
60
-
61
- attr_accessor :index_name
62
-
63
- def send_data(ops)
64
- return if ops.empty?
65
-
66
- @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
- Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
- end
69
-
70
- def ready_to_flush?
71
- @operation_queue.size >= @flush_threshold
72
- end
73
- end
74
- end