connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +10 -8
  3. data/lib/app/config.rb +6 -1
  4. data/lib/app/console_app.rb +1 -1
  5. data/lib/app/dispatcher.rb +18 -3
  6. data/lib/connectors/base/connector.rb +39 -22
  7. data/lib/connectors/crawler/scheduler.rb +36 -0
  8. data/lib/connectors/example/connector.rb +2 -2
  9. data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
  10. data/lib/connectors/gitlab/connector.rb +4 -4
  11. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
  12. data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
  13. data/lib/connectors/mongodb/connector.rb +66 -56
  14. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
  17. data/lib/connectors/registry.rb +1 -1
  18. data/lib/connectors/tolerable_error_helper.rb +5 -1
  19. data/lib/connectors_utility.rb +6 -3
  20. data/lib/core/configuration.rb +13 -1
  21. data/lib/core/connector_job.rb +48 -7
  22. data/lib/core/connector_settings.rb +52 -20
  23. data/lib/core/elastic_connector_actions.rb +54 -38
  24. data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
  25. data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
  26. data/lib/core/filtering/filter_validator.rb +103 -0
  27. data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
  28. data/lib/core/filtering/post_process_engine.rb +2 -2
  29. data/lib/core/filtering/processing_stage.rb +20 -0
  30. data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
  31. data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
  32. data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
  33. data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
  34. data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
  35. data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
  36. data/lib/core/filtering/transform/filter_transformer.rb +26 -0
  37. data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
  38. data/lib/core/filtering/transform/transformation_target.rb +10 -0
  39. data/lib/core/filtering/validation_job_runner.rb +1 -3
  40. data/lib/core/filtering.rb +5 -3
  41. data/lib/core/job_cleanup.rb +66 -0
  42. data/lib/core/jobs/consumer.rb +62 -64
  43. data/lib/core/jobs/producer.rb +3 -0
  44. data/lib/core/scheduler.rb +67 -52
  45. data/lib/core/sync_job_runner.rb +170 -83
  46. data/lib/core.rb +1 -0
  47. data/lib/utility/bulk_queue.rb +1 -1
  48. data/lib/utility/constants.rb +0 -2
  49. data/lib/utility/error_monitor.rb +26 -5
  50. data/lib/utility/es_client.rb +4 -0
  51. data/lib/utility/filtering.rb +4 -0
  52. metadata +32 -21
  53. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  54. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  55. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aaca309c6ef67d232a19c895ce21b1c9a0b75b1aed64bc90e4676ec5647c43ff
4
- data.tar.gz: 1cc77648386b48bd863319ca309167bfce4a7fda5f50ceff18b659c134a88fa1
3
+ metadata.gz: 3c2c9d24a514189f2efb9fbfdcba52a2ee49d40dec97025b6524e1562087e750
4
+ data.tar.gz: f3ea6369ff7cbab5983178df08d5aa168f72cd66325c23c5dd6fe839688db1f8
5
5
  SHA512:
6
- metadata.gz: 727ef952b3c32f38c56aaa05e6514ad9124e0086e0d898b02843932f1fa2889d870f099890e957c69e36d532bb65fb60f1996e5ec42233c09e8c19868767cc97
7
- data.tar.gz: 66d47ed3e631562e080c01030eb1d5a0ea44f10f6d317d3181db5f89ec02f3d7ddbeeb486567a09e058c56b75cd225bb47dc31cf615762ce9602c56c69b27d24
6
+ metadata.gz: 2b80a16d7a1d60fe19d1ac95d9cd8a764629b256632fc205342513654fbbceae4b70d73f4f8ab8b2be808ed6d36f7d62773a32326a01e7579a15b6d4dbdc00f0
7
+ data.tar.gz: cfe21c6e49e6a95af8820a2a89b5dd6dc1b5b080cf174dd90c575bc5b390047334b27a4df7f01a3c26d19990162758b4f82cdb17ad01d048057bd62a3ded8863
@@ -1,11 +1,10 @@
1
1
  # general metadata
2
- version: 8.7.0.0-20221117T010623Z
3
- repository: https://github.com/elastic/connectors-ruby.git
4
- revision: 294214a26b0fe9a4347763b01de681c336e8daae
2
+ version: 8.11.0.0
3
+ repository: git@github.com:elastic/connectors-ruby.git
4
+ revision: b6c38305e5f7297064f4cf1cbda20f892483f267
5
5
  elasticsearch:
6
- cloud_id: CHANGEME
7
6
  hosts: http://localhost:9200
8
- api_key: CHANGEME
7
+ api_key: QWFHdV9JY0JDZDEzMU8tU3N1Y2Q6YXRyLTV5SlBTUENKYXcta1Yxc0RVZw==
9
8
  retry_on_failure: 3
10
9
  request_timeout: 120
11
10
  disable_warnings: true
@@ -16,10 +15,13 @@ thread_pool:
16
15
  max_threads: 5
17
16
  max_queue: 100
18
17
  log_level: info
19
- ecs_logging: true
18
+ ecs_logging: false
20
19
  poll_interval: 3
21
20
  termination_timeout: 60
22
21
  heartbeat_interval: 1800
22
+ job_cleanup_interval: 300
23
23
  native_mode: true
24
- connector_id: CHANGEME
25
- service_type: CHANGEME
24
+ connector_id: _6Go_IcBCd131O-Ss-YH
25
+ service_type: mongodb
26
+ max_ingestion_queue_size: 500
27
+ max_ingestion_queue_bytes: 5242880
data/lib/app/config.rb CHANGED
@@ -37,6 +37,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
37
37
  optional(:log).value(:bool?)
38
38
  optional(:ca_fingerprint).value(:string)
39
39
  optional(:transport_options).value(:hash)
40
+ optional(:headers).value(:hash)
40
41
  end
41
42
 
42
43
  optional(:thread_pool).hash do
@@ -54,6 +55,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
54
55
  optional(:poll_interval).value(:integer)
55
56
  optional(:termination_timeout).value(:integer)
56
57
  optional(:heartbeat_interval).value(:integer)
58
+ optional(:job_cleanup_interval).value(:integer)
57
59
 
58
60
  optional(:max_ingestion_queue_size).value(:integer) # items
59
61
  optional(:max_ingestion_queue_bytes).value(:integer) # bytes
@@ -111,6 +113,8 @@ module App
111
113
  return nil
112
114
  end
113
115
 
116
+ headers = ent_search_config['elasticsearch.headers'] || ent_search_config.dig('elasticsearch', 'headers')
117
+
114
118
  {
115
119
  :hosts => [
116
120
  {
@@ -120,7 +124,8 @@ module App
120
124
  host: uri.host,
121
125
  port: uri.port
122
126
  }
123
- ]
127
+ ],
128
+ :headers => headers
124
129
  }
125
130
  end
126
131
 
@@ -89,7 +89,7 @@ module App
89
89
  def enable_scheduling
90
90
  return unless connector_registered?
91
91
 
92
- previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.scheduling_settings&.fetch(:interval, nil)
92
+ previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.full_sync_scheduling&.fetch(:interval, nil)
93
93
  if previous_schedule.present?
94
94
  puts "Please enter a valid crontab expression for scheduling. Previous schedule was: #{previous_schedule}."
95
95
  else
@@ -21,6 +21,7 @@ module App
21
21
  MIN_THREADS = (App::Config.dig(:thread_pool, :min_threads) || 0).to_i
22
22
  MAX_THREADS = (App::Config.dig(:thread_pool, :max_threads) || 5).to_i
23
23
  MAX_QUEUE = (App::Config.dig(:thread_pool, :max_queue) || 100).to_i
24
+ JOB_CLEANUP_INTERVAL = (App::Config.job_cleanup_interval || 60 * 5).to_i
24
25
 
25
26
  @running = Concurrent::AtomicBoolean.new(false)
26
27
 
@@ -28,16 +29,16 @@ module App
28
29
  def start!
29
30
  running!
30
31
  Utility::Logger.info("Starting connector service in #{App::Config.native_mode ? 'native' : 'non-native'} mode...")
31
-
32
+ start_job_cleanup_task!
32
33
  # start sync jobs consumer
33
34
  start_consumer!
34
-
35
35
  start_polling_jobs!
36
36
  end
37
37
 
38
38
  def shutdown!
39
39
  Utility::Logger.info("Shutting down connector service with pool [#{pool.class}]...")
40
40
  running.make_false
41
+ job_cleanup_timer.shutdown
41
42
  scheduler.shutdown
42
43
  pool.shutdown
43
44
  pool.wait_for_termination(TERMINATION_TIMEOUT)
@@ -70,11 +71,25 @@ module App
70
71
  end
71
72
  end
72
73
 
74
+ def job_cleanup_timer
75
+ @job_cleanup_timer ||= Concurrent::TimerTask.new(:execution_interval => JOB_CLEANUP_INTERVAL, :run_now => true) do
76
+ connector_id = App::Config.native_mode ? nil : App::Config.connector_id
77
+ Core::JobCleanUp.execute(connector_id)
78
+ end
79
+ end
80
+
81
+ def start_job_cleanup_task!
82
+ job_cleanup_timer.execute
83
+ end
84
+
73
85
  def start_polling_jobs!
74
86
  scheduler.when_triggered do |connector_settings, task|
75
87
  case task
76
88
  when :sync
77
- # update connector sync_now flag
89
+ # TODO: #update_connector_sync_now should be moved to Core::ConnectorSettings,
90
+ # there should not be any business logic related code in Core::ElasticConnectorActions.
91
+ # #update_connector_sync_now should not update `last_synced` after https://github.com/elastic/enterprise-search-team/issues/3366 is resolved,
92
+ # schedule should not based on `last_synced`
78
93
  Core::ElasticConnectorActions.update_connector_sync_now(connector_settings.id, false)
79
94
 
80
95
  Core::Jobs::Producer.enqueue_job(job_type: :sync, connector_settings: connector_settings)
@@ -9,9 +9,15 @@
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'app/config'
11
11
  require 'bson'
12
- require 'connectors/base/advanced_snippet_validator'
13
12
  require 'core/ingestion'
14
13
  require 'connectors/tolerable_error_helper'
14
+ require 'core/filtering/advanced_snippet/advanced_snippet_validator'
15
+ require 'core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator'
16
+ require 'core/filtering/simple_rules/validation/single_rule_against_schema_validator'
17
+ require 'core/filtering/transform/filter_transformer_facade'
18
+ require 'core/filtering/transform/transformation_target'
19
+ require 'core/filtering/filter_validator'
20
+ require 'core/filtering/processing_stage'
15
21
  require 'core/filtering/validation_status'
16
22
  require 'utility'
17
23
  require 'utility/filtering'
@@ -38,40 +44,55 @@ module Connectors
38
44
 
39
45
  def self.kibana_features
40
46
  [
41
- Utility::Constants::FILTERING_RULES_FEATURE,
42
- Utility::Constants::FILTERING_ADVANCED_FEATURE
47
+ { :feature => :sync_rules, :subfeature => :basic, :enabled => true },
48
+ { :feature => :sync_rules, :subfeature => :advanced, :enabled => true }
43
49
  ]
44
50
  end
45
51
 
46
- def self.advanced_snippet_validator
47
- AdvancedSnippetValidator
52
+ def self.advanced_snippet_validators
53
+ Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
48
54
  end
49
55
 
50
- def self.validate_filtering(filtering = {})
51
- # nothing to validate
52
- return { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] } unless filtering.present?
56
+ def self.simple_rules_validators
57
+ {
58
+ Core::Filtering::ProcessingStage::ALL => [
59
+ Core::Filtering::SimpleRules::Validation::SingleRuleAgainstSchemaValidator,
60
+ Core::Filtering::SimpleRules::Validation::NoConflictingPoliciesRulesValidator
61
+ ]
62
+ }
63
+ end
53
64
 
54
- filter = Utility::Filtering.extract_filter(filtering)
55
- advanced_snippet = filter.dig(:advanced_snippet, :value)
65
+ def self.filter_transformers
66
+ {
67
+ Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [],
68
+ Core::Filtering::Transform::TransformationTarget::RULES => []
69
+ }
70
+ end
56
71
 
57
- snippet_validator_instance = advanced_snippet_validator.new(advanced_snippet)
72
+ def self.validate_filtering(filtering = {})
73
+ filter = Utility::Filtering.extract_filter(filtering)
58
74
 
59
- snippet_validator_instance.is_snippet_valid?
75
+ filter_validator = Core::Filtering::FilterValidator.new(snippet_validator_classes: advanced_snippet_validators,
76
+ rules_validator_classes: simple_rules_validators,
77
+ rules_pre_processing_active: Utility::Filtering.rule_pre_processing_active?(filter))
78
+ filter_validator.is_filter_valid(filter)
60
79
  end
61
80
 
62
81
  attr_reader :rules, :advanced_filter_config
63
82
 
64
- def initialize(configuration: {}, job_description: {})
83
+ def initialize(configuration: {}, job_description: nil)
65
84
  error_monitor = Utility::ErrorMonitor.new
66
85
  @tolerable_error_helper = Connectors::TolerableErrorHelper.new(error_monitor)
67
86
 
68
- @configuration = configuration.dup || {}
69
- @job_description = job_description&.dup || {}
87
+ @configuration = job_description&.configuration&.dup || configuration&.dup || {}
88
+ @job_description = job_description&.dup
70
89
 
71
- filtering = Utility::Filtering.extract_filter(@job_description.dig(:connector, :filtering))
90
+ filter = Utility::Filtering.extract_filter(@job_description&.filtering)
91
+ filter = Core::Filtering::Transform::FilterTransformerFacade.new(filter, self.class.filter_transformers).transform
72
92
 
73
- @rules = filtering[:rules] || []
74
- @advanced_filter_config = filtering.dig(:advanced_snippet, :value) || {}
93
+ @rules = filter[:rules] || []
94
+ # regression bug, we need to keep indifferent access here until we get rid of symbols in the connectors
95
+ @advanced_filter_config = filter[:advanced_snippet]&.with_indifferent_access || {}
75
96
  end
76
97
 
77
98
  def yield_documents; end
@@ -100,10 +121,6 @@ module Connectors
100
121
  false
101
122
  end
102
123
 
103
- def filtering_present?
104
- @advanced_filter_config.present? && !@advanced_filter_config.empty? || @rules.present?
105
- end
106
-
107
124
  def metadata
108
125
  {}
109
126
  end
@@ -22,11 +22,47 @@ module Connectors
22
22
  []
23
23
  end
24
24
 
25
+ def when_triggered
26
+ loop do
27
+ connector_settings.each do |cs|
28
+ # crawler only supports :sync
29
+ if sync_triggered?(cs)
30
+ yield cs, :sync, nil
31
+ next
32
+ end
33
+
34
+ schedule_key = custom_schedule_triggered(cs)
35
+ yield cs, :sync, schedule_key if schedule_key
36
+ end
37
+ rescue *Utility::AUTHORIZATION_ERRORS => e
38
+ log_authorization_error(e)
39
+ rescue StandardError => e
40
+ log_standard_error(e)
41
+ ensure
42
+ if @is_shutting_down
43
+ break
44
+ end
45
+ sleep_for_poll_interval
46
+ end
47
+ end
48
+
25
49
  private
26
50
 
27
51
  def connector_registered?(service_type)
28
52
  service_type == 'elastic-crawler'
29
53
  end
54
+
55
+ # custom scheduling has no ordering, so the first-found schedule is returned
56
+ def custom_schedule_triggered(cs)
57
+ cs.custom_scheduling_settings.each do |key, custom_scheduling|
58
+ identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59
+ if schedule_triggered?(custom_scheduling, identifier)
60
+ return key
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
30
66
  end
31
67
  end
32
68
  end
@@ -36,7 +36,7 @@ module Connectors
36
36
  }
37
37
  end
38
38
 
39
- def initialize(configuration: {}, job_description: {})
39
+ def initialize(configuration: {}, job_description: nil)
40
40
  super
41
41
  end
42
42
 
@@ -47,7 +47,7 @@ module Connectors
47
47
  # raise 'something went wrong'
48
48
  end
49
49
 
50
- def self.advanced_snippet_validator
50
+ def self.advanced_snippet_validators
51
51
  ExampleAdvancedSnippetValidator
52
52
  end
53
53
 
@@ -6,13 +6,14 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors/base/advanced_snippet_validator'
9
+ require 'core/filtering/advanced_snippet/advanced_snippet_validator'
10
+ require 'core/filtering/validation_status'
10
11
 
11
12
  module Connectors
12
13
  module Example
13
- class ExampleAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
14
+ class ExampleAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
14
15
 
15
- def is_snippet_valid?
16
+ def is_snippet_valid
16
17
  # TODO: real filtering validation will follow later
17
18
  errors = [
18
19
  {
@@ -37,16 +37,16 @@ module Connectors
37
37
  }
38
38
  end
39
39
 
40
- def self.advanced_snippet_validator
40
+ def self.advanced_snippet_validators
41
41
  GitLabAdvancedSnippetValidator
42
42
  end
43
43
 
44
- def initialize(configuration: {}, job_description: {})
44
+ def initialize(configuration: {}, job_description: nil)
45
45
  super
46
46
 
47
47
  @extractor = Connectors::GitLab::Extractor.new(
48
- :base_url => configuration.dig(:base_url, :value),
49
- :api_token => configuration.dig(:api_token, :value)
48
+ :base_url => @configuration.dig(:base_url, :value),
49
+ :api_token => @configuration.dig(:api_token, :value)
50
50
  )
51
51
  end
52
52
 
@@ -6,13 +6,13 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors/base/advanced_snippet_validator'
9
+ require 'core/filtering/advanced_snippet/advanced_snippet_validator'
10
10
 
11
11
  module Connectors
12
12
  module GitLab
13
- class GitLabAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
13
+ class GitLabAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
14
14
 
15
- def is_snippet_valid?
15
+ def is_snippet_valid
16
16
  # TODO: real filtering validation will follow later
17
17
  errors = [
18
18
  {
@@ -21,13 +21,11 @@ module Connectors
21
21
  }
22
22
  ]
23
23
 
24
- validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty?
25
- { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
- else
27
- { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
- end
29
- log_validation_result(validation_result)
30
- validation_result
24
+ if @advanced_snippet.present? && !@advanced_snippet.empty?
25
+ { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
+ else
27
+ { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
+ end
31
29
  end
32
30
 
33
31
  end
@@ -4,10 +4,11 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
- module ConnectorsApp
8
- module Errors
9
- INVALID_API_KEY = 'INVALID_API_KEY'
10
- UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
11
- INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
7
+ # frozen_string_literal: true
8
+
9
+ module Connectors
10
+ class JobTriggerMethod
11
+ ON_DEMAND = 'on_demand'
12
+ SCHEDULED = 'scheduled'
12
13
  end
13
14
  end
@@ -8,7 +8,8 @@
8
8
 
9
9
  require 'connectors/base/connector'
10
10
  require 'core/filtering/validation_status'
11
- require 'connectors/mongodb/mongo_rules_parser'
11
+ require 'core/filtering/transform/transformation_target'
12
+ require 'connectors/mongodb/mongo_advanced_snippet_snake_case_transformer'
12
13
  require 'connectors/mongodb/mongo_advanced_snippet_against_schema_validator'
13
14
  require 'mongo'
14
15
  require 'utility'
@@ -19,6 +20,9 @@ module Connectors
19
20
 
20
21
  ALLOWED_TOP_LEVEL_FILTER_KEYS = %w[find aggregate]
21
22
 
23
+ AGGREGATE = 'aggregate'
24
+ FIND = 'find'
25
+
22
26
  PAGE_SIZE = 100
23
27
 
24
28
  def self.service_type
@@ -52,19 +56,25 @@ module Connectors
52
56
  }
53
57
  end
54
58
 
55
- def self.advanced_snippet_validator
59
+ def self.advanced_snippet_validators
56
60
  MongoAdvancedSnippetAgainstSchemaValidator
57
61
  end
58
62
 
59
- def initialize(configuration: {}, job_description: {})
63
+ def self.filter_transformers
64
+ {
65
+ Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [MongoAdvancedSnippetSnakeCaseTransformer]
66
+ }
67
+ end
68
+
69
+ def initialize(configuration: {}, job_description: nil)
60
70
  super
61
71
 
62
- @host = configuration.dig(:host, :value)
63
- @database = configuration.dig(:database, :value)
64
- @collection = configuration.dig(:collection, :value)
65
- @user = configuration.dig(:user, :value)
66
- @password = configuration.dig(:password, :value)
67
- @direct_connection = configuration.dig(:direct_connection, :value)
72
+ @host = @configuration.dig(:host, :value)
73
+ @database = @configuration.dig(:database, :value)
74
+ @collection = @configuration.dig(:collection, :value)
75
+ @user = @configuration.dig(:user, :value)
76
+ @password = @configuration.dig(:password, :value)
77
+ @direct_connection = @configuration.dig(:direct_connection, :value)
68
78
  end
69
79
 
70
80
  def yield_documents
@@ -73,44 +83,56 @@ module Connectors
73
83
  # This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
74
84
  # It's done due to the fact that usage of .find.each leads to memory leaks or overuse of memory - the whole result set seems to stay in memory
75
85
  # during the sync. Sometimes (not 100% sure) it even leads to a real leak, when the memory for these objects is never recycled.
76
- cursor, options = create_db_cursor_on_collection(client[@collection])
77
- skip = 0
78
-
79
- found_overall = 0
80
-
81
- # if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
82
- # thus can never reach -1)
83
- overall_limit = Float::INFINITY
86
+ cursor_type, cursor_with_options = create_db_cursor_on_collection(client[@collection])
87
+ cursor, options = cursor_with_options
88
+
89
+ case cursor_type
90
+ when FIND
91
+ skip = 0
92
+ found_overall = 0
93
+
94
+ # if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
95
+ # thus can never reach -1)
96
+ overall_limit = Float::INFINITY
97
+
98
+ if options.present?
99
+ # there could be a skip parameter defined for filtering
100
+ skip = options.fetch(:skip, skip)
101
+ # there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
102
+ overall_limit = options.fetch(:limit, overall_limit)
103
+ end
84
104
 
85
- if options.present?
86
- # there could be a skip parameter defined for filtering
87
- skip = options.fetch(:skip, skip)
88
- # there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
89
- overall_limit = options.fetch(:limit, overall_limit)
90
- end
105
+ overall_limit_reached = false
106
+
107
+ loop do
108
+ found_in_page = 0
109
+
110
+ Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
111
+ view = cursor.skip(skip).limit(PAGE_SIZE)
112
+ view.each do |document|
113
+ yield_with_handling_tolerable_errors do
114
+ yield serialize(document)
115
+ found_in_page += 1
116
+ found_overall += 1
117
+ overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
118
+ end
119
+ break if overall_limit_reached
120
+ end
91
121
 
92
- overall_limit_reached = false
122
+ page_was_empty = found_in_page == 0
93
123
 
94
- loop do
95
- found_in_page = 0
124
+ break if page_was_empty || overall_limit_reached
96
125
 
97
- Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
98
- view = cursor.skip(skip).limit(PAGE_SIZE)
99
- view.each do |document|
126
+ skip += PAGE_SIZE
127
+ end
128
+ when AGGREGATE
129
+ cursor.each do |document|
100
130
  yield_with_handling_tolerable_errors do
101
131
  yield serialize(document)
102
- found_in_page += 1
103
- found_overall += 1
104
- overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
105
132
  end
106
- break if overall_limit_reached
107
133
  end
108
-
109
- page_was_empty = found_in_page == 0
110
-
111
- break if page_was_empty || overall_limit_reached
112
-
113
- skip += PAGE_SIZE
134
+ else
135
+ raise "Unknown retrieval function #{cursor_type} for MongoDB."
114
136
  end
115
137
  end
116
138
  end
@@ -118,22 +140,20 @@ module Connectors
118
140
  private
119
141
 
120
142
  def create_db_cursor_on_collection(collection)
121
- return create_find_cursor(collection) if @advanced_filter_config[:find].present?
143
+ return [AGGREGATE, create_aggregate_cursor(collection)] if @advanced_filter_config[:aggregate].present?
122
144
 
123
- return create_aggregate_cursor(collection) if @advanced_filter_config[:aggregate].present?
145
+ return [FIND, create_find_cursor(collection)] if @advanced_filter_config[:find].present?
124
146
 
125
- return create_simple_rules_cursor(collection) if @rules.present?
126
-
127
- collection.find
147
+ [FIND, collection.find]
128
148
  end
129
149
 
130
150
  def create_aggregate_cursor(collection)
131
151
  aggregate = @advanced_filter_config[:aggregate]
132
152
 
133
- pipeline = aggregate[:pipeline]
153
+ pipeline = aggregate[:pipeline] || []
134
154
  options = extract_options(aggregate)
135
155
 
136
- if !pipeline.nil? && pipeline.empty? && !options.present?
156
+ if pipeline.empty? && options.empty?
137
157
  Utility::Logger.warn('\'Aggregate\' was specified with an empty pipeline and empty options.')
138
158
  end
139
159
 
@@ -153,16 +173,6 @@ module Connectors
153
173
  [collection.find(filter, options), options]
154
174
  end
155
175
 
156
- def create_simple_rules_cursor(collection)
157
- filter = {}
158
- if @rules.present?
159
- parser = MongoRulesParser.new(@rules)
160
- filter = parser.parse
161
- end
162
- Utility::Logger.info("Filtering with simple rules filter: #{filter}")
163
- filter.present? ? collection.find(filter) : collection.find
164
- end
165
-
166
176
  def extract_options(mongodb_function)
167
177
  mongodb_function[:options].present? ? mongodb_function[:options] : {}
168
178
  end
@@ -6,12 +6,12 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors/base/advanced_snippet_against_schema_validator'
9
+ require 'core/filtering/advanced_snippet/advanced_snippet_against_schema_validator'
10
10
  require 'connectors/mongodb/mongo_advanced_snippet_schema'
11
11
 
12
12
  module Connectors
13
13
  module MongoDB
14
- class MongoAdvancedSnippetAgainstSchemaValidator < Connectors::Base::AdvancedSnippetAgainstSchemaValidator
14
+ class MongoAdvancedSnippetAgainstSchemaValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetAgainstSchemaValidator
15
15
 
16
16
  def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
17
17
  super
@@ -25,7 +25,7 @@ module Connectors
25
25
  NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
26
26
  READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
27
27
  STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
28
- MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
28
+ MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.nil? || fields.size <= 1 }
29
29
 
30
30
  AGGREGATION_PIPELINE = lambda { |pipeline|
31
31
  return false unless pipeline.is_a?(Array)
@@ -265,7 +265,8 @@ module Connectors
265
265
 
266
266
  FIND_FILTER = {
267
267
  :name => 'filter',
268
- :type => FILTER
268
+ :type => FILTER,
269
+ :optional => true
269
270
  }
270
271
 
271
272
  FIND = {