connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b550a78dba7e4cd4502b6eea4c187253e6bb77ab944815b59809d92c7ccc23bb
4
- data.tar.gz: dbe8c32e1da94ed40777af57a84ecac883a15cab71aa164534157314bbcbfcff
3
+ metadata.gz: 8f69f05260d34b07ce34d569ce7c41fdd10349b33121823697ebbb6a4ebf9206
4
+ data.tar.gz: a2957118c80d0e2bc9ea6a8046307485c11e4f809efb01cabbb96e341dc947c2
5
5
  SHA512:
6
- metadata.gz: 92ef83e3bc94107b1cb11b4454760b17c217b61beb555b55d01ddb7b0758372c3f04ed1acc153e406f20c7abb2df5a3540c2b93733b48eafcc99bd752e7d6759
7
- data.tar.gz: a7eade1996d683fcf47e072704d566479538249d9711e8c0d2019fbb7bd15c382c61cd35adc6ee52f989a75c1fa6d7a6fe1330a04801fb46952aa307f2e93ac5
6
+ metadata.gz: 63775eded9d9953b41950edd7ca86176200c4ae7510564f6f7995c336d6e78bbe40d494cb0a152a984b14b45055e7b7847a2779888d70905d4956b8c78d4bda1
7
+ data.tar.gz: 52b00d122ef43fc5afa0b4cb50bbe428111e7fe2cb7cee437dd2d2b6b32516cbd01c5803b0a11609a5021e85605a2e6bd2b30973807df3cae1420864e2fcb185
@@ -1,11 +1,10 @@
1
1
  # general metadata
2
- version: 8.6.0.3
3
- repository: https://github.com/elastic/connectors-ruby.git
4
- revision: aa2faf8cc993a26980441adffe97d62fdaf5aa5c
2
+ version: 8.6.0.4-20221114T233727Z
3
+ repository: git@github.com:elastic/ent-search-connectors.git
4
+ revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
5
5
  elasticsearch:
6
- cloud_id: CHANGEME
7
6
  hosts: http://localhost:9200
8
- api_key: CHANGEME
7
+ api_key: WXNYeWQ0UUJ4Y3ZQV3ctbjVibnU6REx4eE8tbFhUMU94N2JoU2hIeVFMQQ==
9
8
  retry_on_failure: 3
10
9
  request_timeout: 120
11
10
  disable_warnings: true
@@ -15,11 +14,11 @@ thread_pool:
15
14
  min_threads: 0
16
15
  max_threads: 5
17
16
  max_queue: 100
18
- log_level: info
19
- ecs_logging: true
17
+ log_level: debug
18
+ ecs_logging: false
20
19
  poll_interval: 3
21
20
  termination_timeout: 60
22
21
  heartbeat_interval: 1800
23
- native_mode: true
24
- connector_id: CHANGEME
25
- service_type: CHANGEME
22
+ native_mode: false
23
+ connector_id: YcXyd4QBxcvPWw-n2bkA
24
+ service_type: mongodb
data/lib/app/config.rb CHANGED
@@ -35,6 +35,8 @@ puts "Parsing #{CONFIG_FILE} configuration file."
35
35
  optional(:disable_warnings).value(:bool?)
36
36
  optional(:trace).value(:bool?)
37
37
  optional(:log).value(:bool?)
38
+ optional(:ca_fingerprint).value(:string)
39
+ optional(:transport_options).value(:hash)
38
40
  end
39
41
 
40
42
  optional(:thread_pool).hash do
@@ -73,6 +73,8 @@ module App
73
73
  start_heartbeat_task(connector_settings)
74
74
  when :configuration
75
75
  start_configuration_task(connector_settings)
76
+ when :filter_validation
77
+ start_filter_validation_task(connector_settings)
76
78
  else
77
79
  Utility::Logger.error("Unknown task type: #{task}. Skipping...")
78
80
  end
@@ -84,10 +86,14 @@ module App
84
86
  def start_sync_task(connector_settings)
85
87
  start_heartbeat_task(connector_settings)
86
88
  pool.post do
87
- Utility::Logger.info("Starting a sync job for #{connector_settings.formatted}...")
89
+ Utility::Logger.info("Initiating a sync job for #{connector_settings.formatted}...")
88
90
  Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
89
91
  job_runner = Core::SyncJobRunner.new(connector_settings)
90
92
  job_runner.execute
93
+ rescue Core::JobAlreadyRunningError
94
+ Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
95
+ rescue Core::ConnectorVersionChangedError => e
96
+ Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
91
97
  rescue StandardError => e
92
98
  Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
93
99
  end
@@ -116,6 +122,16 @@ module App
116
122
  Utility::ExceptionTracking.log_exception(e, "Configuration task for #{connector_settings.formatted} failed due to unexpected error.")
117
123
  end
118
124
  end
125
+
126
+ def start_filter_validation_task(connector_settings)
127
+ pool.post do
128
+ Utility::Logger.info("Validating filters for #{connector_settings.formatted}...")
129
+ validation_job_runner = Core::Filtering::ValidationJobRunner.new(connector_settings)
130
+ validation_job_runner.execute
131
+ rescue StandardError => e
132
+ Utility::ExceptionTracking.log_exception(e, "Filter validation task for #{connector_settings.formatted} failed due to unexpected error.")
133
+ end
134
+ end
119
135
  end
120
136
  end
121
137
  end
@@ -23,6 +23,7 @@ module App
23
23
  check_es_connection!
24
24
  check_es_version!
25
25
  check_system_indices!
26
+ check_single_connector!
26
27
  end
27
28
 
28
29
  private
@@ -59,6 +60,16 @@ module App
59
60
  )
60
61
  end
61
62
 
63
+ #-------------------------------------------------------------------------------------------------
64
+ # Ensures the connector is supported when running in non-native mode
65
+ def check_single_connector!
66
+ if App::Config.native_mode
67
+ Utility::Logger.info('Skip single connector check for native mode.')
68
+ elsif !Connectors::REGISTRY.registered?(App::Config.service_type)
69
+ fail_check!("The service type #{App::Config.service_type} is not supported. Terminating...")
70
+ end
71
+ end
72
+
62
73
  def check_es_connection_with_retries!(retry_interval:, retry_timeout:)
63
74
  started_at = Time.now
64
75
 
@@ -75,6 +86,10 @@ module App
75
86
  else
76
87
  raise UnhealthyCluster, "Unexpected cluster status: #{response['status']}"
77
88
  end
89
+ rescue *Utility::AUTHORIZATION_ERRORS => e
90
+ Utility::ExceptionTracking.log_exception(e)
91
+
92
+ fail_check!("Elasticsearch returned 'Unauthorized' response. Check your authentication details. Terminating...")
78
93
  rescue *App::RETRYABLE_CONNECTION_ERRORS => e
79
94
  Utility::Logger.warn('Could not connect to Elasticsearch. Make sure it is running and healthy.')
80
95
  Utility::Logger.debug("Error: #{e.full_message}")
@@ -7,10 +7,11 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'bson'
10
- require 'core/output_sink'
11
- require 'utility/exception_tracking'
12
- require 'utility/errors'
10
+ require 'core/ingestion'
11
+ require 'utility'
12
+ require 'utility/filtering'
13
13
  require 'app/config'
14
+ require 'active_support/core_ext/hash/indifferent_access'
14
15
 
15
16
  module Connectors
16
17
  module Base
@@ -19,6 +20,11 @@ module Connectors
19
20
  raise 'Not implemented for this connector'
20
21
  end
21
22
 
23
+ # Used as a framework util method, don't override
24
+ def self.configurable_fields_indifferent_access
25
+ configurable_fields.with_indifferent_access
26
+ end
27
+
22
28
  def self.configurable_fields
23
29
  {}
24
30
  end
@@ -27,8 +33,27 @@ module Connectors
27
33
  raise 'Not implemented for this connector'
28
34
  end
29
35
 
30
- def initialize(configuration: {})
36
+ def self.kibana_features
37
+ [
38
+ Utility::Constants::FILTERING_RULES_FEATURE,
39
+ Utility::Constants::FILTERING_ADVANCED_FEATURE
40
+ ]
41
+ end
42
+
43
+ def self.validate_filtering(_filtering = {})
44
+ raise 'Not implemented for this connector'
45
+ end
46
+
47
+ attr_reader :rules, :advanced_filter_config
48
+
49
+ def initialize(configuration: {}, job_description: {})
31
50
  @configuration = configuration.dup || {}
51
+ @job_description = job_description&.dup || {}
52
+
53
+ filtering = Utility::Filtering.extract_filter(@job_description.dig(:connector, :filtering))
54
+
55
+ @rules = filtering[:rules] || []
56
+ @advanced_filter_config = filtering[:advanced_snippet] || {}
32
57
  end
33
58
 
34
59
  def yield_documents; end
@@ -52,6 +77,14 @@ module Connectors
52
77
  Utility::ExceptionTracking.log_exception(e, "Connector for service #{self.class.service_type} failed the health check for 3rd-party service.")
53
78
  false
54
79
  end
80
+
81
+ def filtering_present?
82
+ @advanced_filter_config.present? && !@advanced_filter_config.empty? || @rules.present?
83
+ end
84
+
85
+ def metadata
86
+ {}
87
+ end
55
88
  end
56
89
  end
57
90
  end
@@ -0,0 +1,42 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+ # frozen_string_literal: true
7
+
8
+ require 'active_support/core_ext/hash/indifferent_access'
9
+ require 'active_support/core_ext/object/blank'
10
+ require 'core/filtering/simple_rule'
11
+
12
+ module Connectors
13
+ module Base
14
+ class SimpleRulesParser
15
+ def initialize(rules)
16
+ @rules = (rules || []).map(&:with_indifferent_access).filter { |r| r[:id] != 'DEFAULT' }.sort_by { |r| r[:order] }
17
+ end
18
+
19
+ def parse
20
+ merge_rules(@rules.map do |rule_hash|
21
+ rule = Core::Filtering::SimpleRule.new(rule_hash)
22
+ unless rule.is_include? || rule.is_exclude?
23
+ raise "Unknown policy: #{rule.policy}"
24
+ end
25
+ parse_rule(rule)
26
+ end)
27
+ end
28
+
29
+ private
30
+
31
+ # merge all rules into a filter object or array
32
+ # in a base case, does no transformations
33
+ def merge_rules(rules)
34
+ rules || []
35
+ end
36
+
37
+ def parse_rule(_rule)
38
+ raise 'Not implemented'
39
+ end
40
+ end
41
+ end
42
+ end
@@ -8,11 +8,11 @@
8
8
 
9
9
  module Connectors
10
10
  class ConnectorStatus
11
- CREATED = 'created'
11
+ CREATED = 'created'
12
12
  NEEDS_CONFIGURATION = 'needs_configuration'
13
- CONFIGURED = 'configured'
14
- CONNECTED = 'connected'
15
- ERROR = 'error'
13
+ CONFIGURED = 'configured'
14
+ CONNECTED = 'connected'
15
+ ERROR = 'error'
16
16
 
17
17
  STATUSES = [
18
18
  CREATED,
@@ -7,6 +7,7 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'connectors/base/connector'
10
+ require 'core/filtering/validation_status'
10
11
  require 'utility'
11
12
 
12
13
  module Connectors
@@ -20,16 +21,21 @@ module Connectors
20
21
  'Example Connector'
21
22
  end
22
23
 
24
+ # Field 'Foo' won't have a default value. Field 'Bar' will have the default value 'Value'.
23
25
  def self.configurable_fields
24
26
  {
25
27
  'foo' => {
26
28
  'label' => 'Foo',
27
29
  'value' => nil
30
+ },
31
+ :bar => {
32
+ :label => 'Bar',
33
+ :value => 'Value'
28
34
  }
29
35
  }
30
36
  end
31
37
 
32
- def initialize(configuration: {})
38
+ def initialize(configuration: {}, job_description: {})
33
39
  super
34
40
  end
35
41
 
@@ -40,18 +46,51 @@ module Connectors
40
46
  # raise 'something went wrong'
41
47
  end
42
48
 
49
+ def self.validate_filtering(filtering = {})
50
+ # TODO: real filtering validation will follow later
51
+ errors = [
52
+ {
53
+ :ids => ['missing-implementation'],
54
+ :messages => ['Filtering is not implemented yet for the example connector']
55
+ }
56
+ ]
57
+
58
+ return { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors } if filtering.present?
59
+
60
+ { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
61
+ end
62
+
43
63
  def yield_documents
44
64
  attachments = [
45
- File.open('./lib/connectors/example/example_attachments/first_attachment.txt'),
46
- File.open('./lib/connectors/example/example_attachments/second_attachment.txt'),
47
- File.open('./lib/connectors/example/example_attachments/third_attachment.txt')
65
+ load_attachment('first_attachment.txt'),
66
+ load_attachment('second_attachment.txt'),
67
+ load_attachment('third_attachment.txt'),
48
68
  ]
49
69
 
50
70
  attachments.each_with_index do |att, index|
51
71
  data = { id: (index + 1).to_s, name: "example document #{index + 1}", _attachment: File.read(att) }
72
+
73
+ # Uncomment one of these two lines to simulate longer running sync jobs
74
+ #
75
+ # sleep(rand(10..60).seconds)
76
+ # sleep(rand(1..10).minutes)
77
+
52
78
  yield data
53
79
  end
54
80
  end
81
+
82
+ private
83
+
84
+ def load_attachment(path)
85
+ attachment_dir = "#{File.dirname(__FILE__)}/attachments"
86
+ attachment_path = "#{attachment_dir}/#{path}"
87
+
88
+ unless File.exist?(attachment_path)
89
+ raise "Attachment at location '#{attachment_path}' doesn't exist. Attachments should be located under #{attachment_dir}"
90
+ end
91
+
92
+ File.open(attachment_path)
93
+ end
55
94
  end
56
95
  end
57
96
  end
@@ -11,7 +11,7 @@ require 'connectors/base/connector'
11
11
  require 'connectors/gitlab/extractor'
12
12
  require 'connectors/gitlab/custom_client'
13
13
  require 'connectors/gitlab/adapter'
14
- require 'core/output_sink'
14
+ require 'core/ingestion'
15
15
 
16
16
  module Connectors
17
17
  module GitLab
@@ -36,7 +36,21 @@ module Connectors
36
36
  }
37
37
  end
38
38
 
39
- def initialize(configuration: {})
39
+ def self.validate_filtering(filtering = {})
40
+ # TODO: real filtering validation will follow later
41
+ errors = [
42
+ {
43
+ :ids => ['missing-implementation'],
44
+ :messages => ['Filtering is not implemented yet for the GitLab connector']
45
+ }
46
+ ]
47
+
48
+ return { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors } if filtering.present?
49
+
50
+ { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
51
+ end
52
+
53
+ def initialize(configuration: {}, job_description: {})
40
54
  super
41
55
 
42
56
  @extractor = Connectors::GitLab::Extractor.new(
@@ -6,13 +6,20 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'active_support/core_ext/hash/indifferent_access'
10
9
  require 'connectors/base/connector'
10
+ require 'core/filtering/validation_status'
11
+ require 'connectors/mongodb/mongo_rules_parser'
11
12
  require 'mongo'
13
+ require 'utility'
12
14
 
13
15
  module Connectors
14
16
  module MongoDB
15
17
  class Connector < Connectors::Base::Connector
18
+
19
+ ALLOWED_TOP_LEVEL_FILTER_KEYS = %w[find aggregate]
20
+
21
+ PAGE_SIZE = 100
22
+
16
23
  def self.service_type
17
24
  'mongodb'
18
25
  end
@@ -23,28 +30,47 @@ module Connectors
23
30
 
24
31
  def self.configurable_fields
25
32
  {
26
- :host => {
27
- :label => 'Server Hostname'
28
- },
29
- :user => {
30
- :label => 'Username'
31
- },
32
- :password => {
33
- :label => 'Password'
34
- },
35
- :database => {
36
- :label => 'Database'
37
- },
38
- :collection => {
39
- :label => 'Collection'
40
- },
41
- :direct_connection => {
42
- :label => 'Direct connection? (true/false)'
43
- }
33
+ :host => {
34
+ :label => 'Server Hostname'
35
+ },
36
+ :user => {
37
+ :label => 'Username'
38
+ },
39
+ :password => {
40
+ :label => 'Password'
41
+ },
42
+ :database => {
43
+ :label => 'Database'
44
+ },
45
+ :collection => {
46
+ :label => 'Collection'
47
+ },
48
+ :direct_connection => {
49
+ :label => 'Direct connection? (true/false)'
50
+ }
44
51
  }
45
52
  end
46
53
 
47
- def initialize(configuration: {})
54
+ def self.validate_filtering(filtering = {})
55
+ valid_filtering = { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
56
+
57
+ return valid_filtering unless filtering.present?
58
+
59
+ filter = Utility::Filtering.extract_filter(filtering)
60
+
61
+ advanced_filter_config = filter[:advanced_snippet] || {}
62
+ filter_keys = advanced_filter_config&.keys
63
+
64
+ if !filter_keys&.empty? && (filter_keys.size != 1 || !ALLOWED_TOP_LEVEL_FILTER_KEYS.include?(filter_keys[0]&.to_s))
65
+ return { :state => Core::Filtering::ValidationStatus::INVALID,
66
+ :errors => [{ :ids => ['wrong-keys'],
67
+ :messages => ["Only one of #{ALLOWED_TOP_LEVEL_FILTER_KEYS} is allowed in the filtering object. Keys present: '#{filter_keys}'."] }] }
68
+ end
69
+
70
+ valid_filtering
71
+ end
72
+
73
+ def initialize(configuration: {}, job_description: {})
48
74
  super
49
75
 
50
76
  @host = configuration.dig(:host, :value)
@@ -57,16 +83,105 @@ module Connectors
57
83
 
58
84
  def yield_documents
59
85
  with_client do |client|
60
- client[@collection].find.each do |document|
61
- doc = document.with_indifferent_access
86
+ # We do paging using skip().limit() here to make Ruby recycle the memory for each page pulled from the server after it's not needed any more.
87
+ # This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
88
+ # It's done due to the fact that usage of .find.each leads to memory leaks or overuse of memory - the whole result set seems to stay in memory
89
+ # during the sync. Sometimes (not 100% sure) it even leads to a real leak, when the memory for these objects is never recycled.
90
+ cursor, options = create_db_cursor_on_collection(client[@collection])
91
+ skip = 0
92
+
93
+ found_overall = 0
94
+
95
+ # if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
96
+ # thus can never reach -1)
97
+ overall_limit = Float::INFINITY
98
+
99
+ if options.present?
100
+ # there could be a skip parameter defined for filtering
101
+ skip = options.fetch(:skip, skip)
102
+ # there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
103
+ overall_limit = options.fetch(:limit, overall_limit)
104
+ end
62
105
 
63
- yield serialize(doc)
106
+ overall_limit_reached = false
107
+
108
+ loop do
109
+ found_in_page = 0
110
+
111
+ Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
112
+ view = cursor.skip(skip).limit(PAGE_SIZE)
113
+ view.each do |document|
114
+ yield serialize(document)
115
+
116
+ found_in_page += 1
117
+ found_overall += 1
118
+
119
+ overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
120
+
121
+ break if overall_limit_reached
122
+ end
123
+
124
+ page_was_empty = found_in_page == 0
125
+
126
+ break if page_was_empty || overall_limit_reached
127
+
128
+ skip += PAGE_SIZE
64
129
  end
65
130
  end
66
131
  end
67
132
 
68
133
  private
69
134
 
135
+ def create_db_cursor_on_collection(collection)
136
+ return create_find_cursor(collection) if @advanced_filter_config[:find].present?
137
+
138
+ return create_aggregate_cursor(collection) if @advanced_filter_config[:aggregate].present?
139
+
140
+ return create_simple_rules_cursor(collection) if @rules.present?
141
+
142
+ collection.find
143
+ end
144
+
145
+ def create_aggregate_cursor(collection)
146
+ aggregate = @advanced_filter_config[:aggregate]
147
+
148
+ pipeline = aggregate[:pipeline]
149
+ options = extract_options(aggregate)
150
+
151
+ if !pipeline.nil? && pipeline.empty? && !options.present?
152
+ Utility::Logger.warn('\'Aggregate\' was specified with an empty pipeline and empty options.')
153
+ end
154
+
155
+ [collection.aggregate(pipeline, options), options]
156
+ end
157
+
158
+ def create_find_cursor(collection)
159
+ find = @advanced_filter_config[:find]
160
+
161
+ filter = find[:filter]
162
+ options = extract_options(find)
163
+
164
+ if !filter.nil? && filter.empty? && !options.present?
165
+ Utility::Logger.warn('\'Find\' was specified with an empty filter and empty options.')
166
+ end
167
+
168
+ [collection.find(filter, options), options]
169
+ end
170
+
171
+ def create_simple_rules_cursor(collection)
172
+ filter = {}
173
+ if @rules.present?
174
+ parser = MongoRulesParser.new(@rules)
175
+ filter = parser.parse
176
+ end
177
+ Utility::Logger.info("Filtering with simple rules filter: #{filter}")
178
+ filter.present? ? collection.find(filter) : collection.find
179
+ end
180
+
181
+ def extract_options(mongodb_function)
182
+ mongodb_function[:options].present? ? mongodb_function[:options] : {}
183
+ end
184
+
70
185
  def do_health_check
71
186
  with_client do |_client|
72
187
  Utility::Logger.debug("Mongo at #{@host}/#{@database} looks healthy.")
@@ -76,34 +191,43 @@ module Connectors
76
191
  def with_client
77
192
  raise "Invalid value for 'Direct connection' : #{@direct_connection}." unless %w[true false].include?(@direct_connection.to_s.strip.downcase)
78
193
 
79
- client = if @user.present? || @password.present?
80
- Mongo::Client.new(
81
- @host,
82
- database: @database,
83
- direct_connection: to_boolean(@direct_connection),
84
- user: @user,
85
- password: @password
86
- )
87
- else
88
- Mongo::Client.new(
89
- @host,
90
- database: @database,
91
- direct_connection: to_boolean(@direct_connection)
92
- )
93
- end
94
-
95
- begin
96
- Utility::Logger.debug("Existing Databases #{client.database_names}")
97
- Utility::Logger.debug('Existing Collections:')
98
-
99
- client.collections.each { |coll| Utility::Logger.debug(coll.name) }
194
+ args = {
195
+ database: @database,
196
+ direct_connection: to_boolean(@direct_connection)
197
+ }
198
+
199
+ if @user.present? || @password.present?
200
+ args[:user] = @user
201
+ args[:password] = @password
202
+ end
203
+
204
+ Mongo::Client.new(@host, args) do |client|
205
+ databases = client.database_names
206
+
207
+ Utility::Logger.debug("Existing Databases: #{databases}")
208
+ check_database_exists!(databases, @database)
209
+
210
+ collections = client.database.collection_names
211
+
212
+ Utility::Logger.debug("Existing Collections: #{collections}")
213
+ check_collection_exists!(collections, @database, @collection)
100
214
 
101
215
  yield client
102
- ensure
103
- client.close
104
216
  end
105
217
  end
106
218
 
219
+ def check_database_exists!(databases, database)
220
+ return if databases.include?(database)
221
+
222
+ raise "Database (#{database}) does not exist. Existing databases: #{databases.join(', ')}"
223
+ end
224
+
225
+ def check_collection_exists!(collections, database, collection)
226
+ return if collections.include?(collection)
227
+
228
+ raise "Collection (#{collection}) does not exist within database '#{database}'. Existing collections: #{collections.join(', ')}"
229
+ end
230
+
107
231
  def serialize(mongodb_document)
108
232
  # This is some lazy serialization here.
109
233
  # Problem: MongoDB has its own format of things - e.g. ids are Bson::ObjectId, which when serialized to JSON
@@ -120,11 +244,10 @@ module Connectors
120
244
  mongodb_document.map { |v| serialize(v) }
121
245
  when Hash
122
246
  mongodb_document.map do |key, value|
123
- remapped_key = key.to_sym == :_id ? :id : key.to_sym
124
-
247
+ key = 'id' if key == '_id'
125
248
  remapped_value = serialize(value)
126
- [remapped_key, remapped_value]
127
- end.to_h.with_indifferent_access
249
+ [key, remapped_value]
250
+ end.to_h
128
251
  else
129
252
  mongodb_document
130
253
  end