connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b550a78dba7e4cd4502b6eea4c187253e6bb77ab944815b59809d92c7ccc23bb
4
- data.tar.gz: dbe8c32e1da94ed40777af57a84ecac883a15cab71aa164534157314bbcbfcff
3
+ metadata.gz: 8f69f05260d34b07ce34d569ce7c41fdd10349b33121823697ebbb6a4ebf9206
4
+ data.tar.gz: a2957118c80d0e2bc9ea6a8046307485c11e4f809efb01cabbb96e341dc947c2
5
5
  SHA512:
6
- metadata.gz: 92ef83e3bc94107b1cb11b4454760b17c217b61beb555b55d01ddb7b0758372c3f04ed1acc153e406f20c7abb2df5a3540c2b93733b48eafcc99bd752e7d6759
7
- data.tar.gz: a7eade1996d683fcf47e072704d566479538249d9711e8c0d2019fbb7bd15c382c61cd35adc6ee52f989a75c1fa6d7a6fe1330a04801fb46952aa307f2e93ac5
6
+ metadata.gz: 63775eded9d9953b41950edd7ca86176200c4ae7510564f6f7995c336d6e78bbe40d494cb0a152a984b14b45055e7b7847a2779888d70905d4956b8c78d4bda1
7
+ data.tar.gz: 52b00d122ef43fc5afa0b4cb50bbe428111e7fe2cb7cee437dd2d2b6b32516cbd01c5803b0a11609a5021e85605a2e6bd2b30973807df3cae1420864e2fcb185
@@ -1,11 +1,10 @@
1
1
  # general metadata
2
- version: 8.6.0.3
3
- repository: https://github.com/elastic/connectors-ruby.git
4
- revision: aa2faf8cc993a26980441adffe97d62fdaf5aa5c
2
+ version: 8.6.0.4-20221114T233727Z
3
+ repository: git@github.com:elastic/ent-search-connectors.git
4
+ revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
5
5
  elasticsearch:
6
- cloud_id: CHANGEME
7
6
  hosts: http://localhost:9200
8
- api_key: CHANGEME
7
+ api_key: WXNYeWQ0UUJ4Y3ZQV3ctbjVibnU6REx4eE8tbFhUMU94N2JoU2hIeVFMQQ==
9
8
  retry_on_failure: 3
10
9
  request_timeout: 120
11
10
  disable_warnings: true
@@ -15,11 +14,11 @@ thread_pool:
15
14
  min_threads: 0
16
15
  max_threads: 5
17
16
  max_queue: 100
18
- log_level: info
19
- ecs_logging: true
17
+ log_level: debug
18
+ ecs_logging: false
20
19
  poll_interval: 3
21
20
  termination_timeout: 60
22
21
  heartbeat_interval: 1800
23
- native_mode: true
24
- connector_id: CHANGEME
25
- service_type: CHANGEME
22
+ native_mode: false
23
+ connector_id: YcXyd4QBxcvPWw-n2bkA
24
+ service_type: mongodb
data/lib/app/config.rb CHANGED
@@ -35,6 +35,8 @@ puts "Parsing #{CONFIG_FILE} configuration file."
35
35
  optional(:disable_warnings).value(:bool?)
36
36
  optional(:trace).value(:bool?)
37
37
  optional(:log).value(:bool?)
38
+ optional(:ca_fingerprint).value(:string)
39
+ optional(:transport_options).value(:hash)
38
40
  end
39
41
 
40
42
  optional(:thread_pool).hash do
@@ -73,6 +73,8 @@ module App
73
73
  start_heartbeat_task(connector_settings)
74
74
  when :configuration
75
75
  start_configuration_task(connector_settings)
76
+ when :filter_validation
77
+ start_filter_validation_task(connector_settings)
76
78
  else
77
79
  Utility::Logger.error("Unknown task type: #{task}. Skipping...")
78
80
  end
@@ -84,10 +86,14 @@ module App
84
86
  def start_sync_task(connector_settings)
85
87
  start_heartbeat_task(connector_settings)
86
88
  pool.post do
87
- Utility::Logger.info("Starting a sync job for #{connector_settings.formatted}...")
89
+ Utility::Logger.info("Initiating a sync job for #{connector_settings.formatted}...")
88
90
  Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
89
91
  job_runner = Core::SyncJobRunner.new(connector_settings)
90
92
  job_runner.execute
93
+ rescue Core::JobAlreadyRunningError
94
+ Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
95
+ rescue Core::ConnectorVersionChangedError => e
96
+ Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
91
97
  rescue StandardError => e
92
98
  Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
93
99
  end
@@ -116,6 +122,16 @@ module App
116
122
  Utility::ExceptionTracking.log_exception(e, "Configuration task for #{connector_settings.formatted} failed due to unexpected error.")
117
123
  end
118
124
  end
125
+
126
+ def start_filter_validation_task(connector_settings)
127
+ pool.post do
128
+ Utility::Logger.info("Validating filters for #{connector_settings.formatted}...")
129
+ validation_job_runner = Core::Filtering::ValidationJobRunner.new(connector_settings)
130
+ validation_job_runner.execute
131
+ rescue StandardError => e
132
+ Utility::ExceptionTracking.log_exception(e, "Filter validation task for #{connector_settings.formatted} failed due to unexpected error.")
133
+ end
134
+ end
119
135
  end
120
136
  end
121
137
  end
@@ -23,6 +23,7 @@ module App
23
23
  check_es_connection!
24
24
  check_es_version!
25
25
  check_system_indices!
26
+ check_single_connector!
26
27
  end
27
28
 
28
29
  private
@@ -59,6 +60,16 @@ module App
59
60
  )
60
61
  end
61
62
 
63
+ #-------------------------------------------------------------------------------------------------
64
+ # Ensures the connector is supported when running in non-native mode
65
+ def check_single_connector!
66
+ if App::Config.native_mode
67
+ Utility::Logger.info('Skip single connector check for native mode.')
68
+ elsif !Connectors::REGISTRY.registered?(App::Config.service_type)
69
+ fail_check!("The service type #{App::Config.service_type} is not supported. Terminating...")
70
+ end
71
+ end
72
+
62
73
  def check_es_connection_with_retries!(retry_interval:, retry_timeout:)
63
74
  started_at = Time.now
64
75
 
@@ -75,6 +86,10 @@ module App
75
86
  else
76
87
  raise UnhealthyCluster, "Unexpected cluster status: #{response['status']}"
77
88
  end
89
+ rescue *Utility::AUTHORIZATION_ERRORS => e
90
+ Utility::ExceptionTracking.log_exception(e)
91
+
92
+ fail_check!("Elasticsearch returned 'Unauthorized' response. Check your authentication details. Terminating...")
78
93
  rescue *App::RETRYABLE_CONNECTION_ERRORS => e
79
94
  Utility::Logger.warn('Could not connect to Elasticsearch. Make sure it is running and healthy.')
80
95
  Utility::Logger.debug("Error: #{e.full_message}")
@@ -7,10 +7,11 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'bson'
10
- require 'core/output_sink'
11
- require 'utility/exception_tracking'
12
- require 'utility/errors'
10
+ require 'core/ingestion'
11
+ require 'utility'
12
+ require 'utility/filtering'
13
13
  require 'app/config'
14
+ require 'active_support/core_ext/hash/indifferent_access'
14
15
 
15
16
  module Connectors
16
17
  module Base
@@ -19,6 +20,11 @@ module Connectors
19
20
  raise 'Not implemented for this connector'
20
21
  end
21
22
 
23
+ # Used as a framework util method, don't override
24
+ def self.configurable_fields_indifferent_access
25
+ configurable_fields.with_indifferent_access
26
+ end
27
+
22
28
  def self.configurable_fields
23
29
  {}
24
30
  end
@@ -27,8 +33,27 @@ module Connectors
27
33
  raise 'Not implemented for this connector'
28
34
  end
29
35
 
30
- def initialize(configuration: {})
36
+ def self.kibana_features
37
+ [
38
+ Utility::Constants::FILTERING_RULES_FEATURE,
39
+ Utility::Constants::FILTERING_ADVANCED_FEATURE
40
+ ]
41
+ end
42
+
43
+ def self.validate_filtering(_filtering = {})
44
+ raise 'Not implemented for this connector'
45
+ end
46
+
47
+ attr_reader :rules, :advanced_filter_config
48
+
49
+ def initialize(configuration: {}, job_description: {})
31
50
  @configuration = configuration.dup || {}
51
+ @job_description = job_description&.dup || {}
52
+
53
+ filtering = Utility::Filtering.extract_filter(@job_description.dig(:connector, :filtering))
54
+
55
+ @rules = filtering[:rules] || []
56
+ @advanced_filter_config = filtering[:advanced_snippet] || {}
32
57
  end
33
58
 
34
59
  def yield_documents; end
@@ -52,6 +77,14 @@ module Connectors
52
77
  Utility::ExceptionTracking.log_exception(e, "Connector for service #{self.class.service_type} failed the health check for 3rd-party service.")
53
78
  false
54
79
  end
80
+
81
+ def filtering_present?
82
+ @advanced_filter_config.present? && !@advanced_filter_config.empty? || @rules.present?
83
+ end
84
+
85
+ def metadata
86
+ {}
87
+ end
55
88
  end
56
89
  end
57
90
  end
@@ -0,0 +1,42 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+ # frozen_string_literal: true
7
+
8
+ require 'active_support/core_ext/hash/indifferent_access'
9
+ require 'active_support/core_ext/object/blank'
10
+ require 'core/filtering/simple_rule'
11
+
12
+ module Connectors
13
+ module Base
14
+ class SimpleRulesParser
15
+ def initialize(rules)
16
+ @rules = (rules || []).map(&:with_indifferent_access).filter { |r| r[:id] != 'DEFAULT' }.sort_by { |r| r[:order] }
17
+ end
18
+
19
+ def parse
20
+ merge_rules(@rules.map do |rule_hash|
21
+ rule = Core::Filtering::SimpleRule.new(rule_hash)
22
+ unless rule.is_include? || rule.is_exclude?
23
+ raise "Unknown policy: #{rule.policy}"
24
+ end
25
+ parse_rule(rule)
26
+ end)
27
+ end
28
+
29
+ private
30
+
31
+ # merge all rules into a filter object or array
32
+ # in a base case, does no transformations
33
+ def merge_rules(rules)
34
+ rules || []
35
+ end
36
+
37
+ def parse_rule(_rule)
38
+ raise 'Not implemented'
39
+ end
40
+ end
41
+ end
42
+ end
@@ -8,11 +8,11 @@
8
8
 
9
9
  module Connectors
10
10
  class ConnectorStatus
11
- CREATED = 'created'
11
+ CREATED = 'created'
12
12
  NEEDS_CONFIGURATION = 'needs_configuration'
13
- CONFIGURED = 'configured'
14
- CONNECTED = 'connected'
15
- ERROR = 'error'
13
+ CONFIGURED = 'configured'
14
+ CONNECTED = 'connected'
15
+ ERROR = 'error'
16
16
 
17
17
  STATUSES = [
18
18
  CREATED,
@@ -7,6 +7,7 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'connectors/base/connector'
10
+ require 'core/filtering/validation_status'
10
11
  require 'utility'
11
12
 
12
13
  module Connectors
@@ -20,16 +21,21 @@ module Connectors
20
21
  'Example Connector'
21
22
  end
22
23
 
24
+ # Field 'Foo' won't have a default value. Field 'Bar' will have the default value 'Value'.
23
25
  def self.configurable_fields
24
26
  {
25
27
  'foo' => {
26
28
  'label' => 'Foo',
27
29
  'value' => nil
30
+ },
31
+ :bar => {
32
+ :label => 'Bar',
33
+ :value => 'Value'
28
34
  }
29
35
  }
30
36
  end
31
37
 
32
- def initialize(configuration: {})
38
+ def initialize(configuration: {}, job_description: {})
33
39
  super
34
40
  end
35
41
 
@@ -40,18 +46,51 @@ module Connectors
40
46
  # raise 'something went wrong'
41
47
  end
42
48
 
49
+ def self.validate_filtering(filtering = {})
50
+ # TODO: real filtering validation will follow later
51
+ errors = [
52
+ {
53
+ :ids => ['missing-implementation'],
54
+ :messages => ['Filtering is not implemented yet for the example connector']
55
+ }
56
+ ]
57
+
58
+ return { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors } if filtering.present?
59
+
60
+ { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
61
+ end
62
+
43
63
  def yield_documents
44
64
  attachments = [
45
- File.open('./lib/connectors/example/example_attachments/first_attachment.txt'),
46
- File.open('./lib/connectors/example/example_attachments/second_attachment.txt'),
47
- File.open('./lib/connectors/example/example_attachments/third_attachment.txt')
65
+ load_attachment('first_attachment.txt'),
66
+ load_attachment('second_attachment.txt'),
67
+ load_attachment('third_attachment.txt'),
48
68
  ]
49
69
 
50
70
  attachments.each_with_index do |att, index|
51
71
  data = { id: (index + 1).to_s, name: "example document #{index + 1}", _attachment: File.read(att) }
72
+
73
+ # Uncomment one of these two lines to simulate longer running sync jobs
74
+ #
75
+ # sleep(rand(10..60).seconds)
76
+ # sleep(rand(1..10).minutes)
77
+
52
78
  yield data
53
79
  end
54
80
  end
81
+
82
+ private
83
+
84
+ def load_attachment(path)
85
+ attachment_dir = "#{File.dirname(__FILE__)}/attachments"
86
+ attachment_path = "#{attachment_dir}/#{path}"
87
+
88
+ unless File.exist?(attachment_path)
89
+ raise "Attachment at location '#{attachment_path}' doesn't exist. Attachments should be located under #{attachment_dir}"
90
+ end
91
+
92
+ File.open(attachment_path)
93
+ end
55
94
  end
56
95
  end
57
96
  end
@@ -11,7 +11,7 @@ require 'connectors/base/connector'
11
11
  require 'connectors/gitlab/extractor'
12
12
  require 'connectors/gitlab/custom_client'
13
13
  require 'connectors/gitlab/adapter'
14
- require 'core/output_sink'
14
+ require 'core/ingestion'
15
15
 
16
16
  module Connectors
17
17
  module GitLab
@@ -36,7 +36,21 @@ module Connectors
36
36
  }
37
37
  end
38
38
 
39
- def initialize(configuration: {})
39
+ def self.validate_filtering(filtering = {})
40
+ # TODO: real filtering validation will follow later
41
+ errors = [
42
+ {
43
+ :ids => ['missing-implementation'],
44
+ :messages => ['Filtering is not implemented yet for the GitLab connector']
45
+ }
46
+ ]
47
+
48
+ return { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors } if filtering.present?
49
+
50
+ { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
51
+ end
52
+
53
+ def initialize(configuration: {}, job_description: {})
40
54
  super
41
55
 
42
56
  @extractor = Connectors::GitLab::Extractor.new(
@@ -6,13 +6,20 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'active_support/core_ext/hash/indifferent_access'
10
9
  require 'connectors/base/connector'
10
+ require 'core/filtering/validation_status'
11
+ require 'connectors/mongodb/mongo_rules_parser'
11
12
  require 'mongo'
13
+ require 'utility'
12
14
 
13
15
  module Connectors
14
16
  module MongoDB
15
17
  class Connector < Connectors::Base::Connector
18
+
19
+ ALLOWED_TOP_LEVEL_FILTER_KEYS = %w[find aggregate]
20
+
21
+ PAGE_SIZE = 100
22
+
16
23
  def self.service_type
17
24
  'mongodb'
18
25
  end
@@ -23,28 +30,47 @@ module Connectors
23
30
 
24
31
  def self.configurable_fields
25
32
  {
26
- :host => {
27
- :label => 'Server Hostname'
28
- },
29
- :user => {
30
- :label => 'Username'
31
- },
32
- :password => {
33
- :label => 'Password'
34
- },
35
- :database => {
36
- :label => 'Database'
37
- },
38
- :collection => {
39
- :label => 'Collection'
40
- },
41
- :direct_connection => {
42
- :label => 'Direct connection? (true/false)'
43
- }
33
+ :host => {
34
+ :label => 'Server Hostname'
35
+ },
36
+ :user => {
37
+ :label => 'Username'
38
+ },
39
+ :password => {
40
+ :label => 'Password'
41
+ },
42
+ :database => {
43
+ :label => 'Database'
44
+ },
45
+ :collection => {
46
+ :label => 'Collection'
47
+ },
48
+ :direct_connection => {
49
+ :label => 'Direct connection? (true/false)'
50
+ }
44
51
  }
45
52
  end
46
53
 
47
- def initialize(configuration: {})
54
+ def self.validate_filtering(filtering = {})
55
+ valid_filtering = { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
56
+
57
+ return valid_filtering unless filtering.present?
58
+
59
+ filter = Utility::Filtering.extract_filter(filtering)
60
+
61
+ advanced_filter_config = filter[:advanced_snippet] || {}
62
+ filter_keys = advanced_filter_config&.keys
63
+
64
+ if !filter_keys&.empty? && (filter_keys.size != 1 || !ALLOWED_TOP_LEVEL_FILTER_KEYS.include?(filter_keys[0]&.to_s))
65
+ return { :state => Core::Filtering::ValidationStatus::INVALID,
66
+ :errors => [{ :ids => ['wrong-keys'],
67
+ :messages => ["Only one of #{ALLOWED_TOP_LEVEL_FILTER_KEYS} is allowed in the filtering object. Keys present: '#{filter_keys}'."] }] }
68
+ end
69
+
70
+ valid_filtering
71
+ end
72
+
73
+ def initialize(configuration: {}, job_description: {})
48
74
  super
49
75
 
50
76
  @host = configuration.dig(:host, :value)
@@ -57,16 +83,105 @@ module Connectors
57
83
 
58
84
  def yield_documents
59
85
  with_client do |client|
60
- client[@collection].find.each do |document|
61
- doc = document.with_indifferent_access
86
+ # We do paging using skip().limit() here to make Ruby recycle the memory for each page pulled from the server after it's not needed any more.
87
+ # This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
88
+ # It's done due to the fact that usage of .find.each leads to memory leaks or overuse of memory - the whole result set seems to stay in memory
89
+ # during the sync. Sometimes (not 100% sure) it even leads to a real leak, when the memory for these objects is never recycled.
90
+ cursor, options = create_db_cursor_on_collection(client[@collection])
91
+ skip = 0
92
+
93
+ found_overall = 0
94
+
95
+ # if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
96
+ # thus can never reach -1)
97
+ overall_limit = Float::INFINITY
98
+
99
+ if options.present?
100
+ # there could be a skip parameter defined for filtering
101
+ skip = options.fetch(:skip, skip)
102
+ # there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
103
+ overall_limit = options.fetch(:limit, overall_limit)
104
+ end
62
105
 
63
- yield serialize(doc)
106
+ overall_limit_reached = false
107
+
108
+ loop do
109
+ found_in_page = 0
110
+
111
+ Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
112
+ view = cursor.skip(skip).limit(PAGE_SIZE)
113
+ view.each do |document|
114
+ yield serialize(document)
115
+
116
+ found_in_page += 1
117
+ found_overall += 1
118
+
119
+ overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
120
+
121
+ break if overall_limit_reached
122
+ end
123
+
124
+ page_was_empty = found_in_page == 0
125
+
126
+ break if page_was_empty || overall_limit_reached
127
+
128
+ skip += PAGE_SIZE
64
129
  end
65
130
  end
66
131
  end
67
132
 
68
133
  private
69
134
 
135
+ def create_db_cursor_on_collection(collection)
136
+ return create_find_cursor(collection) if @advanced_filter_config[:find].present?
137
+
138
+ return create_aggregate_cursor(collection) if @advanced_filter_config[:aggregate].present?
139
+
140
+ return create_simple_rules_cursor(collection) if @rules.present?
141
+
142
+ collection.find
143
+ end
144
+
145
+ def create_aggregate_cursor(collection)
146
+ aggregate = @advanced_filter_config[:aggregate]
147
+
148
+ pipeline = aggregate[:pipeline]
149
+ options = extract_options(aggregate)
150
+
151
+ if !pipeline.nil? && pipeline.empty? && !options.present?
152
+ Utility::Logger.warn('\'Aggregate\' was specified with an empty pipeline and empty options.')
153
+ end
154
+
155
+ [collection.aggregate(pipeline, options), options]
156
+ end
157
+
158
+ def create_find_cursor(collection)
159
+ find = @advanced_filter_config[:find]
160
+
161
+ filter = find[:filter]
162
+ options = extract_options(find)
163
+
164
+ if !filter.nil? && filter.empty? && !options.present?
165
+ Utility::Logger.warn('\'Find\' was specified with an empty filter and empty options.')
166
+ end
167
+
168
+ [collection.find(filter, options), options]
169
+ end
170
+
171
+ def create_simple_rules_cursor(collection)
172
+ filter = {}
173
+ if @rules.present?
174
+ parser = MongoRulesParser.new(@rules)
175
+ filter = parser.parse
176
+ end
177
+ Utility::Logger.info("Filtering with simple rules filter: #{filter}")
178
+ filter.present? ? collection.find(filter) : collection.find
179
+ end
180
+
181
+ def extract_options(mongodb_function)
182
+ mongodb_function[:options].present? ? mongodb_function[:options] : {}
183
+ end
184
+
70
185
  def do_health_check
71
186
  with_client do |_client|
72
187
  Utility::Logger.debug("Mongo at #{@host}/#{@database} looks healthy.")
@@ -76,34 +191,43 @@ module Connectors
76
191
  def with_client
77
192
  raise "Invalid value for 'Direct connection' : #{@direct_connection}." unless %w[true false].include?(@direct_connection.to_s.strip.downcase)
78
193
 
79
- client = if @user.present? || @password.present?
80
- Mongo::Client.new(
81
- @host,
82
- database: @database,
83
- direct_connection: to_boolean(@direct_connection),
84
- user: @user,
85
- password: @password
86
- )
87
- else
88
- Mongo::Client.new(
89
- @host,
90
- database: @database,
91
- direct_connection: to_boolean(@direct_connection)
92
- )
93
- end
94
-
95
- begin
96
- Utility::Logger.debug("Existing Databases #{client.database_names}")
97
- Utility::Logger.debug('Existing Collections:')
98
-
99
- client.collections.each { |coll| Utility::Logger.debug(coll.name) }
194
+ args = {
195
+ database: @database,
196
+ direct_connection: to_boolean(@direct_connection)
197
+ }
198
+
199
+ if @user.present? || @password.present?
200
+ args[:user] = @user
201
+ args[:password] = @password
202
+ end
203
+
204
+ Mongo::Client.new(@host, args) do |client|
205
+ databases = client.database_names
206
+
207
+ Utility::Logger.debug("Existing Databases: #{databases}")
208
+ check_database_exists!(databases, @database)
209
+
210
+ collections = client.database.collection_names
211
+
212
+ Utility::Logger.debug("Existing Collections: #{collections}")
213
+ check_collection_exists!(collections, @database, @collection)
100
214
 
101
215
  yield client
102
- ensure
103
- client.close
104
216
  end
105
217
  end
106
218
 
219
+ def check_database_exists!(databases, database)
220
+ return if databases.include?(database)
221
+
222
+ raise "Database (#{database}) does not exist. Existing databases: #{databases.join(', ')}"
223
+ end
224
+
225
+ def check_collection_exists!(collections, database, collection)
226
+ return if collections.include?(collection)
227
+
228
+ raise "Collection (#{collection}) does not exist within database '#{database}'. Existing collections: #{collections.join(', ')}"
229
+ end
230
+
107
231
  def serialize(mongodb_document)
108
232
  # This is some lazy serialization here.
109
233
  # Problem: MongoDB has its own format of things - e.g. ids are Bson::ObjectId, which when serialized to JSON
@@ -120,11 +244,10 @@ module Connectors
120
244
  mongodb_document.map { |v| serialize(v) }
121
245
  when Hash
122
246
  mongodb_document.map do |key, value|
123
- remapped_key = key.to_sym == :_id ? :id : key.to_sym
124
-
247
+ key = 'id' if key == '_id'
125
248
  remapped_value = serialize(value)
126
- [remapped_key, remapped_value]
127
- end.to_h.with_indifferent_access
249
+ [key, remapped_value]
250
+ end.to_h
128
251
  else
129
252
  mongodb_document
130
253
  end