connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +10 -8
- data/lib/app/config.rb +6 -1
- data/lib/app/console_app.rb +1 -1
- data/lib/app/dispatcher.rb +18 -3
- data/lib/connectors/base/connector.rb +39 -22
- data/lib/connectors/crawler/scheduler.rb +36 -0
- data/lib/connectors/example/connector.rb +2 -2
- data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
- data/lib/connectors/gitlab/connector.rb +4 -4
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
- data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
- data/lib/connectors/mongodb/connector.rb +66 -56
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
- data/lib/connectors/registry.rb +1 -1
- data/lib/connectors/tolerable_error_helper.rb +5 -1
- data/lib/connectors_utility.rb +6 -3
- data/lib/core/configuration.rb +13 -1
- data/lib/core/connector_job.rb +48 -7
- data/lib/core/connector_settings.rb +52 -20
- data/lib/core/elastic_connector_actions.rb +54 -38
- data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
- data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
- data/lib/core/filtering/filter_validator.rb +103 -0
- data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
- data/lib/core/filtering/post_process_engine.rb +2 -2
- data/lib/core/filtering/processing_stage.rb +20 -0
- data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
- data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
- data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
- data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
- data/lib/core/filtering/transform/filter_transformer.rb +26 -0
- data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
- data/lib/core/filtering/transform/transformation_target.rb +10 -0
- data/lib/core/filtering/validation_job_runner.rb +1 -3
- data/lib/core/filtering.rb +5 -3
- data/lib/core/job_cleanup.rb +66 -0
- data/lib/core/jobs/consumer.rb +62 -64
- data/lib/core/jobs/producer.rb +3 -0
- data/lib/core/scheduler.rb +67 -52
- data/lib/core/sync_job_runner.rb +170 -83
- data/lib/core.rb +1 -0
- data/lib/utility/bulk_queue.rb +1 -1
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/error_monitor.rb +26 -5
- data/lib/utility/es_client.rb +4 -0
- data/lib/utility/filtering.rb +4 -0
- metadata +32 -21
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c2c9d24a514189f2efb9fbfdcba52a2ee49d40dec97025b6524e1562087e750
|
4
|
+
data.tar.gz: f3ea6369ff7cbab5983178df08d5aa168f72cd66325c23c5dd6fe839688db1f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b80a16d7a1d60fe19d1ac95d9cd8a764629b256632fc205342513654fbbceae4b70d73f4f8ab8b2be808ed6d36f7d62773a32326a01e7579a15b6d4dbdc00f0
|
7
|
+
data.tar.gz: cfe21c6e49e6a95af8820a2a89b5dd6dc1b5b080cf174dd90c575bc5b390047334b27a4df7f01a3c26d19990162758b4f82cdb17ad01d048057bd62a3ded8863
|
data/config/connectors.yml
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
# general metadata
|
2
|
-
version: 8.
|
3
|
-
repository:
|
4
|
-
revision:
|
2
|
+
version: 8.11.0.0
|
3
|
+
repository: git@github.com:elastic/connectors-ruby.git
|
4
|
+
revision: b6c38305e5f7297064f4cf1cbda20f892483f267
|
5
5
|
elasticsearch:
|
6
|
-
cloud_id: CHANGEME
|
7
6
|
hosts: http://localhost:9200
|
8
|
-
api_key:
|
7
|
+
api_key: QWFHdV9JY0JDZDEzMU8tU3N1Y2Q6YXRyLTV5SlBTUENKYXcta1Yxc0RVZw==
|
9
8
|
retry_on_failure: 3
|
10
9
|
request_timeout: 120
|
11
10
|
disable_warnings: true
|
@@ -16,10 +15,13 @@ thread_pool:
|
|
16
15
|
max_threads: 5
|
17
16
|
max_queue: 100
|
18
17
|
log_level: info
|
19
|
-
ecs_logging:
|
18
|
+
ecs_logging: false
|
20
19
|
poll_interval: 3
|
21
20
|
termination_timeout: 60
|
22
21
|
heartbeat_interval: 1800
|
22
|
+
job_cleanup_interval: 300
|
23
23
|
native_mode: true
|
24
|
-
connector_id:
|
25
|
-
service_type:
|
24
|
+
connector_id: _6Go_IcBCd131O-Ss-YH
|
25
|
+
service_type: mongodb
|
26
|
+
max_ingestion_queue_size: 500
|
27
|
+
max_ingestion_queue_bytes: 5242880
|
data/lib/app/config.rb
CHANGED
@@ -37,6 +37,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
|
|
37
37
|
optional(:log).value(:bool?)
|
38
38
|
optional(:ca_fingerprint).value(:string)
|
39
39
|
optional(:transport_options).value(:hash)
|
40
|
+
optional(:headers).value(:hash)
|
40
41
|
end
|
41
42
|
|
42
43
|
optional(:thread_pool).hash do
|
@@ -54,6 +55,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
|
|
54
55
|
optional(:poll_interval).value(:integer)
|
55
56
|
optional(:termination_timeout).value(:integer)
|
56
57
|
optional(:heartbeat_interval).value(:integer)
|
58
|
+
optional(:job_cleanup_interval).value(:integer)
|
57
59
|
|
58
60
|
optional(:max_ingestion_queue_size).value(:integer) # items
|
59
61
|
optional(:max_ingestion_queue_bytes).value(:integer) # bytes
|
@@ -111,6 +113,8 @@ module App
|
|
111
113
|
return nil
|
112
114
|
end
|
113
115
|
|
116
|
+
headers = ent_search_config['elasticsearch.headers'] || ent_search_config.dig('elasticsearch', 'headers')
|
117
|
+
|
114
118
|
{
|
115
119
|
:hosts => [
|
116
120
|
{
|
@@ -120,7 +124,8 @@ module App
|
|
120
124
|
host: uri.host,
|
121
125
|
port: uri.port
|
122
126
|
}
|
123
|
-
]
|
127
|
+
],
|
128
|
+
:headers => headers
|
124
129
|
}
|
125
130
|
end
|
126
131
|
|
data/lib/app/console_app.rb
CHANGED
@@ -89,7 +89,7 @@ module App
|
|
89
89
|
def enable_scheduling
|
90
90
|
return unless connector_registered?
|
91
91
|
|
92
|
-
previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.
|
92
|
+
previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.full_sync_scheduling&.fetch(:interval, nil)
|
93
93
|
if previous_schedule.present?
|
94
94
|
puts "Please enter a valid crontab expression for scheduling. Previous schedule was: #{previous_schedule}."
|
95
95
|
else
|
data/lib/app/dispatcher.rb
CHANGED
@@ -21,6 +21,7 @@ module App
|
|
21
21
|
MIN_THREADS = (App::Config.dig(:thread_pool, :min_threads) || 0).to_i
|
22
22
|
MAX_THREADS = (App::Config.dig(:thread_pool, :max_threads) || 5).to_i
|
23
23
|
MAX_QUEUE = (App::Config.dig(:thread_pool, :max_queue) || 100).to_i
|
24
|
+
JOB_CLEANUP_INTERVAL = (App::Config.job_cleanup_interval || 60 * 5).to_i
|
24
25
|
|
25
26
|
@running = Concurrent::AtomicBoolean.new(false)
|
26
27
|
|
@@ -28,16 +29,16 @@ module App
|
|
28
29
|
def start!
|
29
30
|
running!
|
30
31
|
Utility::Logger.info("Starting connector service in #{App::Config.native_mode ? 'native' : 'non-native'} mode...")
|
31
|
-
|
32
|
+
start_job_cleanup_task!
|
32
33
|
# start sync jobs consumer
|
33
34
|
start_consumer!
|
34
|
-
|
35
35
|
start_polling_jobs!
|
36
36
|
end
|
37
37
|
|
38
38
|
def shutdown!
|
39
39
|
Utility::Logger.info("Shutting down connector service with pool [#{pool.class}]...")
|
40
40
|
running.make_false
|
41
|
+
job_cleanup_timer.shutdown
|
41
42
|
scheduler.shutdown
|
42
43
|
pool.shutdown
|
43
44
|
pool.wait_for_termination(TERMINATION_TIMEOUT)
|
@@ -70,11 +71,25 @@ module App
|
|
70
71
|
end
|
71
72
|
end
|
72
73
|
|
74
|
+
def job_cleanup_timer
|
75
|
+
@job_cleanup_timer ||= Concurrent::TimerTask.new(:execution_interval => JOB_CLEANUP_INTERVAL, :run_now => true) do
|
76
|
+
connector_id = App::Config.native_mode ? nil : App::Config.connector_id
|
77
|
+
Core::JobCleanUp.execute(connector_id)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def start_job_cleanup_task!
|
82
|
+
job_cleanup_timer.execute
|
83
|
+
end
|
84
|
+
|
73
85
|
def start_polling_jobs!
|
74
86
|
scheduler.when_triggered do |connector_settings, task|
|
75
87
|
case task
|
76
88
|
when :sync
|
77
|
-
#
|
89
|
+
# TODO: #update_connector_sync_now should be moved to Core::ConnectorSettings,
|
90
|
+
# there should not be any business logic related code in Core::ElasticConnectorActions.
|
91
|
+
# #update_connector_sync_now should not update `last_synced` after https://github.com/elastic/enterprise-search-team/issues/3366 is resolved,
|
92
|
+
# schedule should not based on `last_synced`
|
78
93
|
Core::ElasticConnectorActions.update_connector_sync_now(connector_settings.id, false)
|
79
94
|
|
80
95
|
Core::Jobs::Producer.enqueue_job(job_type: :sync, connector_settings: connector_settings)
|
@@ -9,9 +9,15 @@
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'app/config'
|
11
11
|
require 'bson'
|
12
|
-
require 'connectors/base/advanced_snippet_validator'
|
13
12
|
require 'core/ingestion'
|
14
13
|
require 'connectors/tolerable_error_helper'
|
14
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_validator'
|
15
|
+
require 'core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator'
|
16
|
+
require 'core/filtering/simple_rules/validation/single_rule_against_schema_validator'
|
17
|
+
require 'core/filtering/transform/filter_transformer_facade'
|
18
|
+
require 'core/filtering/transform/transformation_target'
|
19
|
+
require 'core/filtering/filter_validator'
|
20
|
+
require 'core/filtering/processing_stage'
|
15
21
|
require 'core/filtering/validation_status'
|
16
22
|
require 'utility'
|
17
23
|
require 'utility/filtering'
|
@@ -38,40 +44,55 @@ module Connectors
|
|
38
44
|
|
39
45
|
def self.kibana_features
|
40
46
|
[
|
41
|
-
|
42
|
-
|
47
|
+
{ :feature => :sync_rules, :subfeature => :basic, :enabled => true },
|
48
|
+
{ :feature => :sync_rules, :subfeature => :advanced, :enabled => true }
|
43
49
|
]
|
44
50
|
end
|
45
51
|
|
46
|
-
def self.
|
47
|
-
AdvancedSnippetValidator
|
52
|
+
def self.advanced_snippet_validators
|
53
|
+
Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
|
48
54
|
end
|
49
55
|
|
50
|
-
def self.
|
51
|
-
|
52
|
-
|
56
|
+
def self.simple_rules_validators
|
57
|
+
{
|
58
|
+
Core::Filtering::ProcessingStage::ALL => [
|
59
|
+
Core::Filtering::SimpleRules::Validation::SingleRuleAgainstSchemaValidator,
|
60
|
+
Core::Filtering::SimpleRules::Validation::NoConflictingPoliciesRulesValidator
|
61
|
+
]
|
62
|
+
}
|
63
|
+
end
|
53
64
|
|
54
|
-
|
55
|
-
|
65
|
+
def self.filter_transformers
|
66
|
+
{
|
67
|
+
Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [],
|
68
|
+
Core::Filtering::Transform::TransformationTarget::RULES => []
|
69
|
+
}
|
70
|
+
end
|
56
71
|
|
57
|
-
|
72
|
+
def self.validate_filtering(filtering = {})
|
73
|
+
filter = Utility::Filtering.extract_filter(filtering)
|
58
74
|
|
59
|
-
|
75
|
+
filter_validator = Core::Filtering::FilterValidator.new(snippet_validator_classes: advanced_snippet_validators,
|
76
|
+
rules_validator_classes: simple_rules_validators,
|
77
|
+
rules_pre_processing_active: Utility::Filtering.rule_pre_processing_active?(filter))
|
78
|
+
filter_validator.is_filter_valid(filter)
|
60
79
|
end
|
61
80
|
|
62
81
|
attr_reader :rules, :advanced_filter_config
|
63
82
|
|
64
|
-
def initialize(configuration: {}, job_description:
|
83
|
+
def initialize(configuration: {}, job_description: nil)
|
65
84
|
error_monitor = Utility::ErrorMonitor.new
|
66
85
|
@tolerable_error_helper = Connectors::TolerableErrorHelper.new(error_monitor)
|
67
86
|
|
68
|
-
@configuration = configuration
|
69
|
-
@job_description = job_description&.dup
|
87
|
+
@configuration = job_description&.configuration&.dup || configuration&.dup || {}
|
88
|
+
@job_description = job_description&.dup
|
70
89
|
|
71
|
-
|
90
|
+
filter = Utility::Filtering.extract_filter(@job_description&.filtering)
|
91
|
+
filter = Core::Filtering::Transform::FilterTransformerFacade.new(filter, self.class.filter_transformers).transform
|
72
92
|
|
73
|
-
@rules =
|
74
|
-
|
93
|
+
@rules = filter[:rules] || []
|
94
|
+
# regression bug, we need to keep indifferent access here until we get rid of symbols in the connectors
|
95
|
+
@advanced_filter_config = filter[:advanced_snippet]&.with_indifferent_access || {}
|
75
96
|
end
|
76
97
|
|
77
98
|
def yield_documents; end
|
@@ -100,10 +121,6 @@ module Connectors
|
|
100
121
|
false
|
101
122
|
end
|
102
123
|
|
103
|
-
def filtering_present?
|
104
|
-
@advanced_filter_config.present? && !@advanced_filter_config.empty? || @rules.present?
|
105
|
-
end
|
106
|
-
|
107
124
|
def metadata
|
108
125
|
{}
|
109
126
|
end
|
@@ -22,11 +22,47 @@ module Connectors
|
|
22
22
|
[]
|
23
23
|
end
|
24
24
|
|
25
|
+
def when_triggered
|
26
|
+
loop do
|
27
|
+
connector_settings.each do |cs|
|
28
|
+
# crawler only supports :sync
|
29
|
+
if sync_triggered?(cs)
|
30
|
+
yield cs, :sync, nil
|
31
|
+
next
|
32
|
+
end
|
33
|
+
|
34
|
+
schedule_key = custom_schedule_triggered(cs)
|
35
|
+
yield cs, :sync, schedule_key if schedule_key
|
36
|
+
end
|
37
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
38
|
+
log_authorization_error(e)
|
39
|
+
rescue StandardError => e
|
40
|
+
log_standard_error(e)
|
41
|
+
ensure
|
42
|
+
if @is_shutting_down
|
43
|
+
break
|
44
|
+
end
|
45
|
+
sleep_for_poll_interval
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
25
49
|
private
|
26
50
|
|
27
51
|
def connector_registered?(service_type)
|
28
52
|
service_type == 'elastic-crawler'
|
29
53
|
end
|
54
|
+
|
55
|
+
# custom scheduling has no ordering, so the first-found schedule is returned
|
56
|
+
def custom_schedule_triggered(cs)
|
57
|
+
cs.custom_scheduling_settings.each do |key, custom_scheduling|
|
58
|
+
identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
|
59
|
+
if schedule_triggered?(custom_scheduling, identifier)
|
60
|
+
return key
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
nil
|
65
|
+
end
|
30
66
|
end
|
31
67
|
end
|
32
68
|
end
|
@@ -36,7 +36,7 @@ module Connectors
|
|
36
36
|
}
|
37
37
|
end
|
38
38
|
|
39
|
-
def initialize(configuration: {}, job_description:
|
39
|
+
def initialize(configuration: {}, job_description: nil)
|
40
40
|
super
|
41
41
|
end
|
42
42
|
|
@@ -47,7 +47,7 @@ module Connectors
|
|
47
47
|
# raise 'something went wrong'
|
48
48
|
end
|
49
49
|
|
50
|
-
def self.
|
50
|
+
def self.advanced_snippet_validators
|
51
51
|
ExampleAdvancedSnippetValidator
|
52
52
|
end
|
53
53
|
|
@@ -6,13 +6,14 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require '
|
9
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_validator'
|
10
|
+
require 'core/filtering/validation_status'
|
10
11
|
|
11
12
|
module Connectors
|
12
13
|
module Example
|
13
|
-
class ExampleAdvancedSnippetValidator <
|
14
|
+
class ExampleAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
|
14
15
|
|
15
|
-
def is_snippet_valid
|
16
|
+
def is_snippet_valid
|
16
17
|
# TODO: real filtering validation will follow later
|
17
18
|
errors = [
|
18
19
|
{
|
@@ -37,16 +37,16 @@ module Connectors
|
|
37
37
|
}
|
38
38
|
end
|
39
39
|
|
40
|
-
def self.
|
40
|
+
def self.advanced_snippet_validators
|
41
41
|
GitLabAdvancedSnippetValidator
|
42
42
|
end
|
43
43
|
|
44
|
-
def initialize(configuration: {}, job_description:
|
44
|
+
def initialize(configuration: {}, job_description: nil)
|
45
45
|
super
|
46
46
|
|
47
47
|
@extractor = Connectors::GitLab::Extractor.new(
|
48
|
-
:base_url => configuration.dig(:base_url, :value),
|
49
|
-
:api_token => configuration.dig(:api_token, :value)
|
48
|
+
:base_url => @configuration.dig(:base_url, :value),
|
49
|
+
:api_token => @configuration.dig(:api_token, :value)
|
50
50
|
)
|
51
51
|
end
|
52
52
|
|
@@ -6,13 +6,13 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require '
|
9
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_validator'
|
10
10
|
|
11
11
|
module Connectors
|
12
12
|
module GitLab
|
13
|
-
class GitLabAdvancedSnippetValidator <
|
13
|
+
class GitLabAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
|
14
14
|
|
15
|
-
def is_snippet_valid
|
15
|
+
def is_snippet_valid
|
16
16
|
# TODO: real filtering validation will follow later
|
17
17
|
errors = [
|
18
18
|
{
|
@@ -21,13 +21,11 @@ module Connectors
|
|
21
21
|
}
|
22
22
|
]
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
log_validation_result(validation_result)
|
30
|
-
validation_result
|
24
|
+
if @advanced_snippet.present? && !@advanced_snippet.empty?
|
25
|
+
{ :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
|
26
|
+
else
|
27
|
+
{ :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
|
28
|
+
end
|
31
29
|
end
|
32
30
|
|
33
31
|
end
|
@@ -4,10 +4,11 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Connectors
|
10
|
+
class JobTriggerMethod
|
11
|
+
ON_DEMAND = 'on_demand'
|
12
|
+
SCHEDULED = 'scheduled'
|
12
13
|
end
|
13
14
|
end
|
@@ -8,7 +8,8 @@
|
|
8
8
|
|
9
9
|
require 'connectors/base/connector'
|
10
10
|
require 'core/filtering/validation_status'
|
11
|
-
require '
|
11
|
+
require 'core/filtering/transform/transformation_target'
|
12
|
+
require 'connectors/mongodb/mongo_advanced_snippet_snake_case_transformer'
|
12
13
|
require 'connectors/mongodb/mongo_advanced_snippet_against_schema_validator'
|
13
14
|
require 'mongo'
|
14
15
|
require 'utility'
|
@@ -19,6 +20,9 @@ module Connectors
|
|
19
20
|
|
20
21
|
ALLOWED_TOP_LEVEL_FILTER_KEYS = %w[find aggregate]
|
21
22
|
|
23
|
+
AGGREGATE = 'aggregate'
|
24
|
+
FIND = 'find'
|
25
|
+
|
22
26
|
PAGE_SIZE = 100
|
23
27
|
|
24
28
|
def self.service_type
|
@@ -52,19 +56,25 @@ module Connectors
|
|
52
56
|
}
|
53
57
|
end
|
54
58
|
|
55
|
-
def self.
|
59
|
+
def self.advanced_snippet_validators
|
56
60
|
MongoAdvancedSnippetAgainstSchemaValidator
|
57
61
|
end
|
58
62
|
|
59
|
-
def
|
63
|
+
def self.filter_transformers
|
64
|
+
{
|
65
|
+
Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [MongoAdvancedSnippetSnakeCaseTransformer]
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
def initialize(configuration: {}, job_description: nil)
|
60
70
|
super
|
61
71
|
|
62
|
-
@host = configuration.dig(:host, :value)
|
63
|
-
@database = configuration.dig(:database, :value)
|
64
|
-
@collection = configuration.dig(:collection, :value)
|
65
|
-
@user = configuration.dig(:user, :value)
|
66
|
-
@password = configuration.dig(:password, :value)
|
67
|
-
@direct_connection = configuration.dig(:direct_connection, :value)
|
72
|
+
@host = @configuration.dig(:host, :value)
|
73
|
+
@database = @configuration.dig(:database, :value)
|
74
|
+
@collection = @configuration.dig(:collection, :value)
|
75
|
+
@user = @configuration.dig(:user, :value)
|
76
|
+
@password = @configuration.dig(:password, :value)
|
77
|
+
@direct_connection = @configuration.dig(:direct_connection, :value)
|
68
78
|
end
|
69
79
|
|
70
80
|
def yield_documents
|
@@ -73,44 +83,56 @@ module Connectors
|
|
73
83
|
# This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
|
74
84
|
# It's done due to the fact that usage of .find.each leads to memory leaks or overuse of memory - the whole result set seems to stay in memory
|
75
85
|
# during the sync. Sometimes (not 100% sure) it even leads to a real leak, when the memory for these objects is never recycled.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
86
|
+
cursor_type, cursor_with_options = create_db_cursor_on_collection(client[@collection])
|
87
|
+
cursor, options = cursor_with_options
|
88
|
+
|
89
|
+
case cursor_type
|
90
|
+
when FIND
|
91
|
+
skip = 0
|
92
|
+
found_overall = 0
|
93
|
+
|
94
|
+
# if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
|
95
|
+
# thus can never reach -1)
|
96
|
+
overall_limit = Float::INFINITY
|
97
|
+
|
98
|
+
if options.present?
|
99
|
+
# there could be a skip parameter defined for filtering
|
100
|
+
skip = options.fetch(:skip, skip)
|
101
|
+
# there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
|
102
|
+
overall_limit = options.fetch(:limit, overall_limit)
|
103
|
+
end
|
84
104
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
105
|
+
overall_limit_reached = false
|
106
|
+
|
107
|
+
loop do
|
108
|
+
found_in_page = 0
|
109
|
+
|
110
|
+
Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
|
111
|
+
view = cursor.skip(skip).limit(PAGE_SIZE)
|
112
|
+
view.each do |document|
|
113
|
+
yield_with_handling_tolerable_errors do
|
114
|
+
yield serialize(document)
|
115
|
+
found_in_page += 1
|
116
|
+
found_overall += 1
|
117
|
+
overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
|
118
|
+
end
|
119
|
+
break if overall_limit_reached
|
120
|
+
end
|
91
121
|
|
92
|
-
|
122
|
+
page_was_empty = found_in_page == 0
|
93
123
|
|
94
|
-
|
95
|
-
found_in_page = 0
|
124
|
+
break if page_was_empty || overall_limit_reached
|
96
125
|
|
97
|
-
|
98
|
-
|
99
|
-
|
126
|
+
skip += PAGE_SIZE
|
127
|
+
end
|
128
|
+
when AGGREGATE
|
129
|
+
cursor.each do |document|
|
100
130
|
yield_with_handling_tolerable_errors do
|
101
131
|
yield serialize(document)
|
102
|
-
found_in_page += 1
|
103
|
-
found_overall += 1
|
104
|
-
overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
|
105
132
|
end
|
106
|
-
break if overall_limit_reached
|
107
133
|
end
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
break if page_was_empty || overall_limit_reached
|
112
|
-
|
113
|
-
skip += PAGE_SIZE
|
134
|
+
else
|
135
|
+
raise "Unknown retrieval function #{cursor_type} for MongoDB."
|
114
136
|
end
|
115
137
|
end
|
116
138
|
end
|
@@ -118,22 +140,20 @@ module Connectors
|
|
118
140
|
private
|
119
141
|
|
120
142
|
def create_db_cursor_on_collection(collection)
|
121
|
-
return
|
143
|
+
return [AGGREGATE, create_aggregate_cursor(collection)] if @advanced_filter_config[:aggregate].present?
|
122
144
|
|
123
|
-
return
|
145
|
+
return [FIND, create_find_cursor(collection)] if @advanced_filter_config[:find].present?
|
124
146
|
|
125
|
-
|
126
|
-
|
127
|
-
collection.find
|
147
|
+
[FIND, collection.find]
|
128
148
|
end
|
129
149
|
|
130
150
|
def create_aggregate_cursor(collection)
|
131
151
|
aggregate = @advanced_filter_config[:aggregate]
|
132
152
|
|
133
|
-
pipeline = aggregate[:pipeline]
|
153
|
+
pipeline = aggregate[:pipeline] || []
|
134
154
|
options = extract_options(aggregate)
|
135
155
|
|
136
|
-
if
|
156
|
+
if pipeline.empty? && options.empty?
|
137
157
|
Utility::Logger.warn('\'Aggregate\' was specified with an empty pipeline and empty options.')
|
138
158
|
end
|
139
159
|
|
@@ -153,16 +173,6 @@ module Connectors
|
|
153
173
|
[collection.find(filter, options), options]
|
154
174
|
end
|
155
175
|
|
156
|
-
def create_simple_rules_cursor(collection)
|
157
|
-
filter = {}
|
158
|
-
if @rules.present?
|
159
|
-
parser = MongoRulesParser.new(@rules)
|
160
|
-
filter = parser.parse
|
161
|
-
end
|
162
|
-
Utility::Logger.info("Filtering with simple rules filter: #{filter}")
|
163
|
-
filter.present? ? collection.find(filter) : collection.find
|
164
|
-
end
|
165
|
-
|
166
176
|
def extract_options(mongodb_function)
|
167
177
|
mongodb_function[:options].present? ? mongodb_function[:options] : {}
|
168
178
|
end
|
@@ -6,12 +6,12 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require '
|
9
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_against_schema_validator'
|
10
10
|
require 'connectors/mongodb/mongo_advanced_snippet_schema'
|
11
11
|
|
12
12
|
module Connectors
|
13
13
|
module MongoDB
|
14
|
-
class MongoAdvancedSnippetAgainstSchemaValidator <
|
14
|
+
class MongoAdvancedSnippetAgainstSchemaValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetAgainstSchemaValidator
|
15
15
|
|
16
16
|
def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
|
17
17
|
super
|
@@ -25,7 +25,7 @@ module Connectors
|
|
25
25
|
NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
|
26
26
|
READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
|
27
27
|
STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
|
28
|
-
MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
|
28
|
+
MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.nil? || fields.size <= 1 }
|
29
29
|
|
30
30
|
AGGREGATION_PIPELINE = lambda { |pipeline|
|
31
31
|
return false unless pipeline.is_a?(Array)
|
@@ -265,7 +265,8 @@ module Connectors
|
|
265
265
|
|
266
266
|
FIND_FILTER = {
|
267
267
|
:name => 'filter',
|
268
|
-
:type => FILTER
|
268
|
+
:type => FILTER,
|
269
|
+
:optional => true
|
269
270
|
}
|
270
271
|
|
271
272
|
FIND = {
|