connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +10 -8
  3. data/lib/app/config.rb +6 -1
  4. data/lib/app/console_app.rb +1 -1
  5. data/lib/app/dispatcher.rb +18 -3
  6. data/lib/connectors/base/connector.rb +39 -22
  7. data/lib/connectors/crawler/scheduler.rb +36 -0
  8. data/lib/connectors/example/connector.rb +2 -2
  9. data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
  10. data/lib/connectors/gitlab/connector.rb +4 -4
  11. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
  12. data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
  13. data/lib/connectors/mongodb/connector.rb +66 -56
  14. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
  17. data/lib/connectors/registry.rb +1 -1
  18. data/lib/connectors/tolerable_error_helper.rb +5 -1
  19. data/lib/connectors_utility.rb +6 -3
  20. data/lib/core/configuration.rb +13 -1
  21. data/lib/core/connector_job.rb +48 -7
  22. data/lib/core/connector_settings.rb +52 -20
  23. data/lib/core/elastic_connector_actions.rb +54 -38
  24. data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
  25. data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
  26. data/lib/core/filtering/filter_validator.rb +103 -0
  27. data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
  28. data/lib/core/filtering/post_process_engine.rb +2 -2
  29. data/lib/core/filtering/processing_stage.rb +20 -0
  30. data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
  31. data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
  32. data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
  33. data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
  34. data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
  35. data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
  36. data/lib/core/filtering/transform/filter_transformer.rb +26 -0
  37. data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
  38. data/lib/core/filtering/transform/transformation_target.rb +10 -0
  39. data/lib/core/filtering/validation_job_runner.rb +1 -3
  40. data/lib/core/filtering.rb +5 -3
  41. data/lib/core/job_cleanup.rb +66 -0
  42. data/lib/core/jobs/consumer.rb +62 -64
  43. data/lib/core/jobs/producer.rb +3 -0
  44. data/lib/core/scheduler.rb +67 -52
  45. data/lib/core/sync_job_runner.rb +170 -83
  46. data/lib/core.rb +1 -0
  47. data/lib/utility/bulk_queue.rb +1 -1
  48. data/lib/utility/constants.rb +0 -2
  49. data/lib/utility/error_monitor.rb +26 -5
  50. data/lib/utility/es_client.rb +4 -0
  51. data/lib/utility/filtering.rb +4 -0
  52. metadata +32 -21
  53. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  54. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  55. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aaca309c6ef67d232a19c895ce21b1c9a0b75b1aed64bc90e4676ec5647c43ff
4
- data.tar.gz: 1cc77648386b48bd863319ca309167bfce4a7fda5f50ceff18b659c134a88fa1
3
+ metadata.gz: 3c2c9d24a514189f2efb9fbfdcba52a2ee49d40dec97025b6524e1562087e750
4
+ data.tar.gz: f3ea6369ff7cbab5983178df08d5aa168f72cd66325c23c5dd6fe839688db1f8
5
5
  SHA512:
6
- metadata.gz: 727ef952b3c32f38c56aaa05e6514ad9124e0086e0d898b02843932f1fa2889d870f099890e957c69e36d532bb65fb60f1996e5ec42233c09e8c19868767cc97
7
- data.tar.gz: 66d47ed3e631562e080c01030eb1d5a0ea44f10f6d317d3181db5f89ec02f3d7ddbeeb486567a09e058c56b75cd225bb47dc31cf615762ce9602c56c69b27d24
6
+ metadata.gz: 2b80a16d7a1d60fe19d1ac95d9cd8a764629b256632fc205342513654fbbceae4b70d73f4f8ab8b2be808ed6d36f7d62773a32326a01e7579a15b6d4dbdc00f0
7
+ data.tar.gz: cfe21c6e49e6a95af8820a2a89b5dd6dc1b5b080cf174dd90c575bc5b390047334b27a4df7f01a3c26d19990162758b4f82cdb17ad01d048057bd62a3ded8863
@@ -1,11 +1,10 @@
1
1
  # general metadata
2
- version: 8.7.0.0-20221117T010623Z
3
- repository: https://github.com/elastic/connectors-ruby.git
4
- revision: 294214a26b0fe9a4347763b01de681c336e8daae
2
+ version: 8.11.0.0
3
+ repository: git@github.com:elastic/connectors-ruby.git
4
+ revision: b6c38305e5f7297064f4cf1cbda20f892483f267
5
5
  elasticsearch:
6
- cloud_id: CHANGEME
7
6
  hosts: http://localhost:9200
8
- api_key: CHANGEME
7
+ api_key: QWFHdV9JY0JDZDEzMU8tU3N1Y2Q6YXRyLTV5SlBTUENKYXcta1Yxc0RVZw==
9
8
  retry_on_failure: 3
10
9
  request_timeout: 120
11
10
  disable_warnings: true
@@ -16,10 +15,13 @@ thread_pool:
16
15
  max_threads: 5
17
16
  max_queue: 100
18
17
  log_level: info
19
- ecs_logging: true
18
+ ecs_logging: false
20
19
  poll_interval: 3
21
20
  termination_timeout: 60
22
21
  heartbeat_interval: 1800
22
+ job_cleanup_interval: 300
23
23
  native_mode: true
24
- connector_id: CHANGEME
25
- service_type: CHANGEME
24
+ connector_id: _6Go_IcBCd131O-Ss-YH
25
+ service_type: mongodb
26
+ max_ingestion_queue_size: 500
27
+ max_ingestion_queue_bytes: 5242880
data/lib/app/config.rb CHANGED
@@ -37,6 +37,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
37
37
  optional(:log).value(:bool?)
38
38
  optional(:ca_fingerprint).value(:string)
39
39
  optional(:transport_options).value(:hash)
40
+ optional(:headers).value(:hash)
40
41
  end
41
42
 
42
43
  optional(:thread_pool).hash do
@@ -54,6 +55,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
54
55
  optional(:poll_interval).value(:integer)
55
56
  optional(:termination_timeout).value(:integer)
56
57
  optional(:heartbeat_interval).value(:integer)
58
+ optional(:job_cleanup_interval).value(:integer)
57
59
 
58
60
  optional(:max_ingestion_queue_size).value(:integer) # items
59
61
  optional(:max_ingestion_queue_bytes).value(:integer) # bytes
@@ -111,6 +113,8 @@ module App
111
113
  return nil
112
114
  end
113
115
 
116
+ headers = ent_search_config['elasticsearch.headers'] || ent_search_config.dig('elasticsearch', 'headers')
117
+
114
118
  {
115
119
  :hosts => [
116
120
  {
@@ -120,7 +124,8 @@ module App
120
124
  host: uri.host,
121
125
  port: uri.port
122
126
  }
123
- ]
127
+ ],
128
+ :headers => headers
124
129
  }
125
130
  end
126
131
 
@@ -89,7 +89,7 @@ module App
89
89
  def enable_scheduling
90
90
  return unless connector_registered?
91
91
 
92
- previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.scheduling_settings&.fetch(:interval, nil)
92
+ previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.full_sync_scheduling&.fetch(:interval, nil)
93
93
  if previous_schedule.present?
94
94
  puts "Please enter a valid crontab expression for scheduling. Previous schedule was: #{previous_schedule}."
95
95
  else
@@ -21,6 +21,7 @@ module App
21
21
  MIN_THREADS = (App::Config.dig(:thread_pool, :min_threads) || 0).to_i
22
22
  MAX_THREADS = (App::Config.dig(:thread_pool, :max_threads) || 5).to_i
23
23
  MAX_QUEUE = (App::Config.dig(:thread_pool, :max_queue) || 100).to_i
24
+ JOB_CLEANUP_INTERVAL = (App::Config.job_cleanup_interval || 60 * 5).to_i
24
25
 
25
26
  @running = Concurrent::AtomicBoolean.new(false)
26
27
 
@@ -28,16 +29,16 @@ module App
28
29
  def start!
29
30
  running!
30
31
  Utility::Logger.info("Starting connector service in #{App::Config.native_mode ? 'native' : 'non-native'} mode...")
31
-
32
+ start_job_cleanup_task!
32
33
  # start sync jobs consumer
33
34
  start_consumer!
34
-
35
35
  start_polling_jobs!
36
36
  end
37
37
 
38
38
  def shutdown!
39
39
  Utility::Logger.info("Shutting down connector service with pool [#{pool.class}]...")
40
40
  running.make_false
41
+ job_cleanup_timer.shutdown
41
42
  scheduler.shutdown
42
43
  pool.shutdown
43
44
  pool.wait_for_termination(TERMINATION_TIMEOUT)
@@ -70,11 +71,25 @@ module App
70
71
  end
71
72
  end
72
73
 
74
+ def job_cleanup_timer
75
+ @job_cleanup_timer ||= Concurrent::TimerTask.new(:execution_interval => JOB_CLEANUP_INTERVAL, :run_now => true) do
76
+ connector_id = App::Config.native_mode ? nil : App::Config.connector_id
77
+ Core::JobCleanUp.execute(connector_id)
78
+ end
79
+ end
80
+
81
+ def start_job_cleanup_task!
82
+ job_cleanup_timer.execute
83
+ end
84
+
73
85
  def start_polling_jobs!
74
86
  scheduler.when_triggered do |connector_settings, task|
75
87
  case task
76
88
  when :sync
77
- # update connector sync_now flag
89
+ # TODO: #update_connector_sync_now should be moved to Core::ConnectorSettings,
90
+ # there should not be any business logic related code in Core::ElasticConnectorActions.
91
+ # #update_connector_sync_now should not update `last_synced` after https://github.com/elastic/enterprise-search-team/issues/3366 is resolved,
92
+ # schedule should not based on `last_synced`
78
93
  Core::ElasticConnectorActions.update_connector_sync_now(connector_settings.id, false)
79
94
 
80
95
  Core::Jobs::Producer.enqueue_job(job_type: :sync, connector_settings: connector_settings)
@@ -9,9 +9,15 @@
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'app/config'
11
11
  require 'bson'
12
- require 'connectors/base/advanced_snippet_validator'
13
12
  require 'core/ingestion'
14
13
  require 'connectors/tolerable_error_helper'
14
+ require 'core/filtering/advanced_snippet/advanced_snippet_validator'
15
+ require 'core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator'
16
+ require 'core/filtering/simple_rules/validation/single_rule_against_schema_validator'
17
+ require 'core/filtering/transform/filter_transformer_facade'
18
+ require 'core/filtering/transform/transformation_target'
19
+ require 'core/filtering/filter_validator'
20
+ require 'core/filtering/processing_stage'
15
21
  require 'core/filtering/validation_status'
16
22
  require 'utility'
17
23
  require 'utility/filtering'
@@ -38,40 +44,55 @@ module Connectors
38
44
 
39
45
  def self.kibana_features
40
46
  [
41
- Utility::Constants::FILTERING_RULES_FEATURE,
42
- Utility::Constants::FILTERING_ADVANCED_FEATURE
47
+ { :feature => :sync_rules, :subfeature => :basic, :enabled => true },
48
+ { :feature => :sync_rules, :subfeature => :advanced, :enabled => true }
43
49
  ]
44
50
  end
45
51
 
46
- def self.advanced_snippet_validator
47
- AdvancedSnippetValidator
52
+ def self.advanced_snippet_validators
53
+ Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
48
54
  end
49
55
 
50
- def self.validate_filtering(filtering = {})
51
- # nothing to validate
52
- return { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] } unless filtering.present?
56
+ def self.simple_rules_validators
57
+ {
58
+ Core::Filtering::ProcessingStage::ALL => [
59
+ Core::Filtering::SimpleRules::Validation::SingleRuleAgainstSchemaValidator,
60
+ Core::Filtering::SimpleRules::Validation::NoConflictingPoliciesRulesValidator
61
+ ]
62
+ }
63
+ end
53
64
 
54
- filter = Utility::Filtering.extract_filter(filtering)
55
- advanced_snippet = filter.dig(:advanced_snippet, :value)
65
+ def self.filter_transformers
66
+ {
67
+ Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [],
68
+ Core::Filtering::Transform::TransformationTarget::RULES => []
69
+ }
70
+ end
56
71
 
57
- snippet_validator_instance = advanced_snippet_validator.new(advanced_snippet)
72
+ def self.validate_filtering(filtering = {})
73
+ filter = Utility::Filtering.extract_filter(filtering)
58
74
 
59
- snippet_validator_instance.is_snippet_valid?
75
+ filter_validator = Core::Filtering::FilterValidator.new(snippet_validator_classes: advanced_snippet_validators,
76
+ rules_validator_classes: simple_rules_validators,
77
+ rules_pre_processing_active: Utility::Filtering.rule_pre_processing_active?(filter))
78
+ filter_validator.is_filter_valid(filter)
60
79
  end
61
80
 
62
81
  attr_reader :rules, :advanced_filter_config
63
82
 
64
- def initialize(configuration: {}, job_description: {})
83
+ def initialize(configuration: {}, job_description: nil)
65
84
  error_monitor = Utility::ErrorMonitor.new
66
85
  @tolerable_error_helper = Connectors::TolerableErrorHelper.new(error_monitor)
67
86
 
68
- @configuration = configuration.dup || {}
69
- @job_description = job_description&.dup || {}
87
+ @configuration = job_description&.configuration&.dup || configuration&.dup || {}
88
+ @job_description = job_description&.dup
70
89
 
71
- filtering = Utility::Filtering.extract_filter(@job_description.dig(:connector, :filtering))
90
+ filter = Utility::Filtering.extract_filter(@job_description&.filtering)
91
+ filter = Core::Filtering::Transform::FilterTransformerFacade.new(filter, self.class.filter_transformers).transform
72
92
 
73
- @rules = filtering[:rules] || []
74
- @advanced_filter_config = filtering.dig(:advanced_snippet, :value) || {}
93
+ @rules = filter[:rules] || []
94
+ # regression bug, we need to keep indifferent access here until we get rid of symbols in the connectors
95
+ @advanced_filter_config = filter[:advanced_snippet]&.with_indifferent_access || {}
75
96
  end
76
97
 
77
98
  def yield_documents; end
@@ -100,10 +121,6 @@ module Connectors
100
121
  false
101
122
  end
102
123
 
103
- def filtering_present?
104
- @advanced_filter_config.present? && !@advanced_filter_config.empty? || @rules.present?
105
- end
106
-
107
124
  def metadata
108
125
  {}
109
126
  end
@@ -22,11 +22,47 @@ module Connectors
22
22
  []
23
23
  end
24
24
 
25
+ def when_triggered
26
+ loop do
27
+ connector_settings.each do |cs|
28
+ # crawler only supports :sync
29
+ if sync_triggered?(cs)
30
+ yield cs, :sync, nil
31
+ next
32
+ end
33
+
34
+ schedule_key = custom_schedule_triggered(cs)
35
+ yield cs, :sync, schedule_key if schedule_key
36
+ end
37
+ rescue *Utility::AUTHORIZATION_ERRORS => e
38
+ log_authorization_error(e)
39
+ rescue StandardError => e
40
+ log_standard_error(e)
41
+ ensure
42
+ if @is_shutting_down
43
+ break
44
+ end
45
+ sleep_for_poll_interval
46
+ end
47
+ end
48
+
25
49
  private
26
50
 
27
51
  def connector_registered?(service_type)
28
52
  service_type == 'elastic-crawler'
29
53
  end
54
+
55
+ # custom scheduling has no ordering, so the first-found schedule is returned
56
+ def custom_schedule_triggered(cs)
57
+ cs.custom_scheduling_settings.each do |key, custom_scheduling|
58
+ identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59
+ if schedule_triggered?(custom_scheduling, identifier)
60
+ return key
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
30
66
  end
31
67
  end
32
68
  end
@@ -36,7 +36,7 @@ module Connectors
36
36
  }
37
37
  end
38
38
 
39
- def initialize(configuration: {}, job_description: {})
39
+ def initialize(configuration: {}, job_description: nil)
40
40
  super
41
41
  end
42
42
 
@@ -47,7 +47,7 @@ module Connectors
47
47
  # raise 'something went wrong'
48
48
  end
49
49
 
50
- def self.advanced_snippet_validator
50
+ def self.advanced_snippet_validators
51
51
  ExampleAdvancedSnippetValidator
52
52
  end
53
53
 
@@ -6,13 +6,14 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors/base/advanced_snippet_validator'
9
+ require 'core/filtering/advanced_snippet/advanced_snippet_validator'
10
+ require 'core/filtering/validation_status'
10
11
 
11
12
  module Connectors
12
13
  module Example
13
- class ExampleAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
14
+ class ExampleAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
14
15
 
15
- def is_snippet_valid?
16
+ def is_snippet_valid
16
17
  # TODO: real filtering validation will follow later
17
18
  errors = [
18
19
  {
@@ -37,16 +37,16 @@ module Connectors
37
37
  }
38
38
  end
39
39
 
40
- def self.advanced_snippet_validator
40
+ def self.advanced_snippet_validators
41
41
  GitLabAdvancedSnippetValidator
42
42
  end
43
43
 
44
- def initialize(configuration: {}, job_description: {})
44
+ def initialize(configuration: {}, job_description: nil)
45
45
  super
46
46
 
47
47
  @extractor = Connectors::GitLab::Extractor.new(
48
- :base_url => configuration.dig(:base_url, :value),
49
- :api_token => configuration.dig(:api_token, :value)
48
+ :base_url => @configuration.dig(:base_url, :value),
49
+ :api_token => @configuration.dig(:api_token, :value)
50
50
  )
51
51
  end
52
52
 
@@ -6,13 +6,13 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors/base/advanced_snippet_validator'
9
+ require 'core/filtering/advanced_snippet/advanced_snippet_validator'
10
10
 
11
11
  module Connectors
12
12
  module GitLab
13
- class GitLabAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
13
+ class GitLabAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
14
14
 
15
- def is_snippet_valid?
15
+ def is_snippet_valid
16
16
  # TODO: real filtering validation will follow later
17
17
  errors = [
18
18
  {
@@ -21,13 +21,11 @@ module Connectors
21
21
  }
22
22
  ]
23
23
 
24
- validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty?
25
- { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
- else
27
- { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
- end
29
- log_validation_result(validation_result)
30
- validation_result
24
+ if @advanced_snippet.present? && !@advanced_snippet.empty?
25
+ { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
+ else
27
+ { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
+ end
31
29
  end
32
30
 
33
31
  end
@@ -4,10 +4,11 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
- module ConnectorsApp
8
- module Errors
9
- INVALID_API_KEY = 'INVALID_API_KEY'
10
- UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
11
- INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
7
+ # frozen_string_literal: true
8
+
9
+ module Connectors
10
+ class JobTriggerMethod
11
+ ON_DEMAND = 'on_demand'
12
+ SCHEDULED = 'scheduled'
12
13
  end
13
14
  end
@@ -8,7 +8,8 @@
8
8
 
9
9
  require 'connectors/base/connector'
10
10
  require 'core/filtering/validation_status'
11
- require 'connectors/mongodb/mongo_rules_parser'
11
+ require 'core/filtering/transform/transformation_target'
12
+ require 'connectors/mongodb/mongo_advanced_snippet_snake_case_transformer'
12
13
  require 'connectors/mongodb/mongo_advanced_snippet_against_schema_validator'
13
14
  require 'mongo'
14
15
  require 'utility'
@@ -19,6 +20,9 @@ module Connectors
19
20
 
20
21
  ALLOWED_TOP_LEVEL_FILTER_KEYS = %w[find aggregate]
21
22
 
23
+ AGGREGATE = 'aggregate'
24
+ FIND = 'find'
25
+
22
26
  PAGE_SIZE = 100
23
27
 
24
28
  def self.service_type
@@ -52,19 +56,25 @@ module Connectors
52
56
  }
53
57
  end
54
58
 
55
- def self.advanced_snippet_validator
59
+ def self.advanced_snippet_validators
56
60
  MongoAdvancedSnippetAgainstSchemaValidator
57
61
  end
58
62
 
59
- def initialize(configuration: {}, job_description: {})
63
+ def self.filter_transformers
64
+ {
65
+ Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [MongoAdvancedSnippetSnakeCaseTransformer]
66
+ }
67
+ end
68
+
69
+ def initialize(configuration: {}, job_description: nil)
60
70
  super
61
71
 
62
- @host = configuration.dig(:host, :value)
63
- @database = configuration.dig(:database, :value)
64
- @collection = configuration.dig(:collection, :value)
65
- @user = configuration.dig(:user, :value)
66
- @password = configuration.dig(:password, :value)
67
- @direct_connection = configuration.dig(:direct_connection, :value)
72
+ @host = @configuration.dig(:host, :value)
73
+ @database = @configuration.dig(:database, :value)
74
+ @collection = @configuration.dig(:collection, :value)
75
+ @user = @configuration.dig(:user, :value)
76
+ @password = @configuration.dig(:password, :value)
77
+ @direct_connection = @configuration.dig(:direct_connection, :value)
68
78
  end
69
79
 
70
80
  def yield_documents
@@ -73,44 +83,56 @@ module Connectors
73
83
  # This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
74
84
  # It's done due to the fact that usage of .find.each leads to memory leaks or overuse of memory - the whole result set seems to stay in memory
75
85
  # during the sync. Sometimes (not 100% sure) it even leads to a real leak, when the memory for these objects is never recycled.
76
- cursor, options = create_db_cursor_on_collection(client[@collection])
77
- skip = 0
78
-
79
- found_overall = 0
80
-
81
- # if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
82
- # thus can never reach -1)
83
- overall_limit = Float::INFINITY
86
+ cursor_type, cursor_with_options = create_db_cursor_on_collection(client[@collection])
87
+ cursor, options = cursor_with_options
88
+
89
+ case cursor_type
90
+ when FIND
91
+ skip = 0
92
+ found_overall = 0
93
+
94
+ # if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
95
+ # thus can never reach -1)
96
+ overall_limit = Float::INFINITY
97
+
98
+ if options.present?
99
+ # there could be a skip parameter defined for filtering
100
+ skip = options.fetch(:skip, skip)
101
+ # there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
102
+ overall_limit = options.fetch(:limit, overall_limit)
103
+ end
84
104
 
85
- if options.present?
86
- # there could be a skip parameter defined for filtering
87
- skip = options.fetch(:skip, skip)
88
- # there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
89
- overall_limit = options.fetch(:limit, overall_limit)
90
- end
105
+ overall_limit_reached = false
106
+
107
+ loop do
108
+ found_in_page = 0
109
+
110
+ Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
111
+ view = cursor.skip(skip).limit(PAGE_SIZE)
112
+ view.each do |document|
113
+ yield_with_handling_tolerable_errors do
114
+ yield serialize(document)
115
+ found_in_page += 1
116
+ found_overall += 1
117
+ overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
118
+ end
119
+ break if overall_limit_reached
120
+ end
91
121
 
92
- overall_limit_reached = false
122
+ page_was_empty = found_in_page == 0
93
123
 
94
- loop do
95
- found_in_page = 0
124
+ break if page_was_empty || overall_limit_reached
96
125
 
97
- Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
98
- view = cursor.skip(skip).limit(PAGE_SIZE)
99
- view.each do |document|
126
+ skip += PAGE_SIZE
127
+ end
128
+ when AGGREGATE
129
+ cursor.each do |document|
100
130
  yield_with_handling_tolerable_errors do
101
131
  yield serialize(document)
102
- found_in_page += 1
103
- found_overall += 1
104
- overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
105
132
  end
106
- break if overall_limit_reached
107
133
  end
108
-
109
- page_was_empty = found_in_page == 0
110
-
111
- break if page_was_empty || overall_limit_reached
112
-
113
- skip += PAGE_SIZE
134
+ else
135
+ raise "Unknown retrieval function #{cursor_type} for MongoDB."
114
136
  end
115
137
  end
116
138
  end
@@ -118,22 +140,20 @@ module Connectors
118
140
  private
119
141
 
120
142
  def create_db_cursor_on_collection(collection)
121
- return create_find_cursor(collection) if @advanced_filter_config[:find].present?
143
+ return [AGGREGATE, create_aggregate_cursor(collection)] if @advanced_filter_config[:aggregate].present?
122
144
 
123
- return create_aggregate_cursor(collection) if @advanced_filter_config[:aggregate].present?
145
+ return [FIND, create_find_cursor(collection)] if @advanced_filter_config[:find].present?
124
146
 
125
- return create_simple_rules_cursor(collection) if @rules.present?
126
-
127
- collection.find
147
+ [FIND, collection.find]
128
148
  end
129
149
 
130
150
  def create_aggregate_cursor(collection)
131
151
  aggregate = @advanced_filter_config[:aggregate]
132
152
 
133
- pipeline = aggregate[:pipeline]
153
+ pipeline = aggregate[:pipeline] || []
134
154
  options = extract_options(aggregate)
135
155
 
136
- if !pipeline.nil? && pipeline.empty? && !options.present?
156
+ if pipeline.empty? && options.empty?
137
157
  Utility::Logger.warn('\'Aggregate\' was specified with an empty pipeline and empty options.')
138
158
  end
139
159
 
@@ -153,16 +173,6 @@ module Connectors
153
173
  [collection.find(filter, options), options]
154
174
  end
155
175
 
156
- def create_simple_rules_cursor(collection)
157
- filter = {}
158
- if @rules.present?
159
- parser = MongoRulesParser.new(@rules)
160
- filter = parser.parse
161
- end
162
- Utility::Logger.info("Filtering with simple rules filter: #{filter}")
163
- filter.present? ? collection.find(filter) : collection.find
164
- end
165
-
166
176
  def extract_options(mongodb_function)
167
177
  mongodb_function[:options].present? ? mongodb_function[:options] : {}
168
178
  end
@@ -6,12 +6,12 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors/base/advanced_snippet_against_schema_validator'
9
+ require 'core/filtering/advanced_snippet/advanced_snippet_against_schema_validator'
10
10
  require 'connectors/mongodb/mongo_advanced_snippet_schema'
11
11
 
12
12
  module Connectors
13
13
  module MongoDB
14
- class MongoAdvancedSnippetAgainstSchemaValidator < Connectors::Base::AdvancedSnippetAgainstSchemaValidator
14
+ class MongoAdvancedSnippetAgainstSchemaValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetAgainstSchemaValidator
15
15
 
16
16
  def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
17
17
  super
@@ -25,7 +25,7 @@ module Connectors
25
25
  NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
26
26
  READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
27
27
  STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
28
- MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
28
+ MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.nil? || fields.size <= 1 }
29
29
 
30
30
  AGGREGATION_PIPELINE = lambda { |pipeline|
31
31
  return false unless pipeline.is_a?(Array)
@@ -265,7 +265,8 @@ module Connectors
265
265
 
266
266
  FIND_FILTER = {
267
267
  :name => 'filter',
268
- :type => FILTER
268
+ :type => FILTER,
269
+ :optional => true
269
270
  }
270
271
 
271
272
  FIND = {