connectors_service 8.7.0.0.pre.20221117T010623Z → 8.11.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +10 -8
- data/lib/app/config.rb +6 -1
- data/lib/app/console_app.rb +1 -1
- data/lib/app/dispatcher.rb +18 -3
- data/lib/connectors/base/connector.rb +39 -22
- data/lib/connectors/crawler/scheduler.rb +36 -0
- data/lib/connectors/example/connector.rb +2 -2
- data/lib/connectors/example/example_advanced_snippet_validator.rb +4 -3
- data/lib/connectors/gitlab/connector.rb +4 -4
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +8 -10
- data/lib/{connectors_app/// → connectors/job_trigger_method.rb} +6 -5
- data/lib/connectors/mongodb/connector.rb +66 -56
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +2 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +3 -2
- data/lib/connectors/mongodb/mongo_advanced_snippet_snake_case_transformer.rb +49 -0
- data/lib/connectors/registry.rb +1 -1
- data/lib/connectors/tolerable_error_helper.rb +5 -1
- data/lib/connectors_utility.rb +6 -3
- data/lib/core/configuration.rb +13 -1
- data/lib/core/connector_job.rb +48 -7
- data/lib/core/connector_settings.rb +52 -20
- data/lib/core/elastic_connector_actions.rb +54 -38
- data/lib/core/filtering/advanced_snippet/advanced_snippet_against_schema_validator.rb +32 -0
- data/lib/core/filtering/advanced_snippet/advanced_snippet_validator.rb +27 -0
- data/lib/core/filtering/filter_validator.rb +103 -0
- data/lib/{connectors/base/advanced_snippet_against_schema_validator.rb → core/filtering/hash_against_schema_validator.rb} +58 -44
- data/lib/core/filtering/post_process_engine.rb +2 -2
- data/lib/core/filtering/processing_stage.rb +20 -0
- data/lib/core/filtering/{simple_rule.rb → simple_rules/simple_rule.rb} +34 -1
- data/lib/core/filtering/simple_rules/simple_rules_parser.rb +44 -0
- data/lib/core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator.rb +47 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_schema.rb +68 -0
- data/lib/core/filtering/simple_rules/validation/simple_rules_validator.rb +25 -0
- data/lib/core/filtering/simple_rules/validation/single_rule_against_schema_validator.rb +37 -0
- data/lib/core/filtering/transform/filter_transformer.rb +26 -0
- data/lib/core/filtering/transform/filter_transformer_facade.rb +61 -0
- data/lib/core/filtering/transform/transformation_target.rb +10 -0
- data/lib/core/filtering/validation_job_runner.rb +1 -3
- data/lib/core/filtering.rb +5 -3
- data/lib/core/job_cleanup.rb +66 -0
- data/lib/core/jobs/consumer.rb +62 -64
- data/lib/core/jobs/producer.rb +3 -0
- data/lib/core/scheduler.rb +67 -52
- data/lib/core/sync_job_runner.rb +170 -83
- data/lib/core.rb +1 -0
- data/lib/utility/bulk_queue.rb +1 -1
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/error_monitor.rb +26 -5
- data/lib/utility/es_client.rb +4 -0
- data/lib/utility/filtering.rb +4 -0
- metadata +32 -21
- data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
- data/lib/connectors/base/simple_rules_parser.rb +0 -42
- data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3c2c9d24a514189f2efb9fbfdcba52a2ee49d40dec97025b6524e1562087e750
|
|
4
|
+
data.tar.gz: f3ea6369ff7cbab5983178df08d5aa168f72cd66325c23c5dd6fe839688db1f8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2b80a16d7a1d60fe19d1ac95d9cd8a764629b256632fc205342513654fbbceae4b70d73f4f8ab8b2be808ed6d36f7d62773a32326a01e7579a15b6d4dbdc00f0
|
|
7
|
+
data.tar.gz: cfe21c6e49e6a95af8820a2a89b5dd6dc1b5b080cf174dd90c575bc5b390047334b27a4df7f01a3c26d19990162758b4f82cdb17ad01d048057bd62a3ded8863
|
data/config/connectors.yml
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
# general metadata
|
|
2
|
-
version: 8.
|
|
3
|
-
repository:
|
|
4
|
-
revision:
|
|
2
|
+
version: 8.11.0.0
|
|
3
|
+
repository: git@github.com:elastic/connectors-ruby.git
|
|
4
|
+
revision: b6c38305e5f7297064f4cf1cbda20f892483f267
|
|
5
5
|
elasticsearch:
|
|
6
|
-
cloud_id: CHANGEME
|
|
7
6
|
hosts: http://localhost:9200
|
|
8
|
-
api_key:
|
|
7
|
+
api_key: QWFHdV9JY0JDZDEzMU8tU3N1Y2Q6YXRyLTV5SlBTUENKYXcta1Yxc0RVZw==
|
|
9
8
|
retry_on_failure: 3
|
|
10
9
|
request_timeout: 120
|
|
11
10
|
disable_warnings: true
|
|
@@ -16,10 +15,13 @@ thread_pool:
|
|
|
16
15
|
max_threads: 5
|
|
17
16
|
max_queue: 100
|
|
18
17
|
log_level: info
|
|
19
|
-
ecs_logging:
|
|
18
|
+
ecs_logging: false
|
|
20
19
|
poll_interval: 3
|
|
21
20
|
termination_timeout: 60
|
|
22
21
|
heartbeat_interval: 1800
|
|
22
|
+
job_cleanup_interval: 300
|
|
23
23
|
native_mode: true
|
|
24
|
-
connector_id:
|
|
25
|
-
service_type:
|
|
24
|
+
connector_id: _6Go_IcBCd131O-Ss-YH
|
|
25
|
+
service_type: mongodb
|
|
26
|
+
max_ingestion_queue_size: 500
|
|
27
|
+
max_ingestion_queue_bytes: 5242880
|
data/lib/app/config.rb
CHANGED
|
@@ -37,6 +37,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
|
|
|
37
37
|
optional(:log).value(:bool?)
|
|
38
38
|
optional(:ca_fingerprint).value(:string)
|
|
39
39
|
optional(:transport_options).value(:hash)
|
|
40
|
+
optional(:headers).value(:hash)
|
|
40
41
|
end
|
|
41
42
|
|
|
42
43
|
optional(:thread_pool).hash do
|
|
@@ -54,6 +55,7 @@ puts "Parsing #{CONFIG_FILE} configuration file."
|
|
|
54
55
|
optional(:poll_interval).value(:integer)
|
|
55
56
|
optional(:termination_timeout).value(:integer)
|
|
56
57
|
optional(:heartbeat_interval).value(:integer)
|
|
58
|
+
optional(:job_cleanup_interval).value(:integer)
|
|
57
59
|
|
|
58
60
|
optional(:max_ingestion_queue_size).value(:integer) # items
|
|
59
61
|
optional(:max_ingestion_queue_bytes).value(:integer) # bytes
|
|
@@ -111,6 +113,8 @@ module App
|
|
|
111
113
|
return nil
|
|
112
114
|
end
|
|
113
115
|
|
|
116
|
+
headers = ent_search_config['elasticsearch.headers'] || ent_search_config.dig('elasticsearch', 'headers')
|
|
117
|
+
|
|
114
118
|
{
|
|
115
119
|
:hosts => [
|
|
116
120
|
{
|
|
@@ -120,7 +124,8 @@ module App
|
|
|
120
124
|
host: uri.host,
|
|
121
125
|
port: uri.port
|
|
122
126
|
}
|
|
123
|
-
]
|
|
127
|
+
],
|
|
128
|
+
:headers => headers
|
|
124
129
|
}
|
|
125
130
|
end
|
|
126
131
|
|
data/lib/app/console_app.rb
CHANGED
|
@@ -89,7 +89,7 @@ module App
|
|
|
89
89
|
def enable_scheduling
|
|
90
90
|
return unless connector_registered?
|
|
91
91
|
|
|
92
|
-
previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.
|
|
92
|
+
previous_schedule = Core::ConnectorSettings.fetch_by_id(connector_id)&.full_sync_scheduling&.fetch(:interval, nil)
|
|
93
93
|
if previous_schedule.present?
|
|
94
94
|
puts "Please enter a valid crontab expression for scheduling. Previous schedule was: #{previous_schedule}."
|
|
95
95
|
else
|
data/lib/app/dispatcher.rb
CHANGED
|
@@ -21,6 +21,7 @@ module App
|
|
|
21
21
|
MIN_THREADS = (App::Config.dig(:thread_pool, :min_threads) || 0).to_i
|
|
22
22
|
MAX_THREADS = (App::Config.dig(:thread_pool, :max_threads) || 5).to_i
|
|
23
23
|
MAX_QUEUE = (App::Config.dig(:thread_pool, :max_queue) || 100).to_i
|
|
24
|
+
JOB_CLEANUP_INTERVAL = (App::Config.job_cleanup_interval || 60 * 5).to_i
|
|
24
25
|
|
|
25
26
|
@running = Concurrent::AtomicBoolean.new(false)
|
|
26
27
|
|
|
@@ -28,16 +29,16 @@ module App
|
|
|
28
29
|
def start!
|
|
29
30
|
running!
|
|
30
31
|
Utility::Logger.info("Starting connector service in #{App::Config.native_mode ? 'native' : 'non-native'} mode...")
|
|
31
|
-
|
|
32
|
+
start_job_cleanup_task!
|
|
32
33
|
# start sync jobs consumer
|
|
33
34
|
start_consumer!
|
|
34
|
-
|
|
35
35
|
start_polling_jobs!
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
def shutdown!
|
|
39
39
|
Utility::Logger.info("Shutting down connector service with pool [#{pool.class}]...")
|
|
40
40
|
running.make_false
|
|
41
|
+
job_cleanup_timer.shutdown
|
|
41
42
|
scheduler.shutdown
|
|
42
43
|
pool.shutdown
|
|
43
44
|
pool.wait_for_termination(TERMINATION_TIMEOUT)
|
|
@@ -70,11 +71,25 @@ module App
|
|
|
70
71
|
end
|
|
71
72
|
end
|
|
72
73
|
|
|
74
|
+
def job_cleanup_timer
|
|
75
|
+
@job_cleanup_timer ||= Concurrent::TimerTask.new(:execution_interval => JOB_CLEANUP_INTERVAL, :run_now => true) do
|
|
76
|
+
connector_id = App::Config.native_mode ? nil : App::Config.connector_id
|
|
77
|
+
Core::JobCleanUp.execute(connector_id)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def start_job_cleanup_task!
|
|
82
|
+
job_cleanup_timer.execute
|
|
83
|
+
end
|
|
84
|
+
|
|
73
85
|
def start_polling_jobs!
|
|
74
86
|
scheduler.when_triggered do |connector_settings, task|
|
|
75
87
|
case task
|
|
76
88
|
when :sync
|
|
77
|
-
#
|
|
89
|
+
# TODO: #update_connector_sync_now should be moved to Core::ConnectorSettings,
|
|
90
|
+
# there should not be any business logic related code in Core::ElasticConnectorActions.
|
|
91
|
+
# #update_connector_sync_now should not update `last_synced` after https://github.com/elastic/enterprise-search-team/issues/3366 is resolved,
|
|
92
|
+
# schedule should not based on `last_synced`
|
|
78
93
|
Core::ElasticConnectorActions.update_connector_sync_now(connector_settings.id, false)
|
|
79
94
|
|
|
80
95
|
Core::Jobs::Producer.enqueue_job(job_type: :sync, connector_settings: connector_settings)
|
|
@@ -9,9 +9,15 @@
|
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
|
10
10
|
require 'app/config'
|
|
11
11
|
require 'bson'
|
|
12
|
-
require 'connectors/base/advanced_snippet_validator'
|
|
13
12
|
require 'core/ingestion'
|
|
14
13
|
require 'connectors/tolerable_error_helper'
|
|
14
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_validator'
|
|
15
|
+
require 'core/filtering/simple_rules/validation/no_conflicting_policies_rules_validator'
|
|
16
|
+
require 'core/filtering/simple_rules/validation/single_rule_against_schema_validator'
|
|
17
|
+
require 'core/filtering/transform/filter_transformer_facade'
|
|
18
|
+
require 'core/filtering/transform/transformation_target'
|
|
19
|
+
require 'core/filtering/filter_validator'
|
|
20
|
+
require 'core/filtering/processing_stage'
|
|
15
21
|
require 'core/filtering/validation_status'
|
|
16
22
|
require 'utility'
|
|
17
23
|
require 'utility/filtering'
|
|
@@ -38,40 +44,55 @@ module Connectors
|
|
|
38
44
|
|
|
39
45
|
def self.kibana_features
|
|
40
46
|
[
|
|
41
|
-
|
|
42
|
-
|
|
47
|
+
{ :feature => :sync_rules, :subfeature => :basic, :enabled => true },
|
|
48
|
+
{ :feature => :sync_rules, :subfeature => :advanced, :enabled => true }
|
|
43
49
|
]
|
|
44
50
|
end
|
|
45
51
|
|
|
46
|
-
def self.
|
|
47
|
-
AdvancedSnippetValidator
|
|
52
|
+
def self.advanced_snippet_validators
|
|
53
|
+
Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
|
|
48
54
|
end
|
|
49
55
|
|
|
50
|
-
def self.
|
|
51
|
-
|
|
52
|
-
|
|
56
|
+
def self.simple_rules_validators
|
|
57
|
+
{
|
|
58
|
+
Core::Filtering::ProcessingStage::ALL => [
|
|
59
|
+
Core::Filtering::SimpleRules::Validation::SingleRuleAgainstSchemaValidator,
|
|
60
|
+
Core::Filtering::SimpleRules::Validation::NoConflictingPoliciesRulesValidator
|
|
61
|
+
]
|
|
62
|
+
}
|
|
63
|
+
end
|
|
53
64
|
|
|
54
|
-
|
|
55
|
-
|
|
65
|
+
def self.filter_transformers
|
|
66
|
+
{
|
|
67
|
+
Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [],
|
|
68
|
+
Core::Filtering::Transform::TransformationTarget::RULES => []
|
|
69
|
+
}
|
|
70
|
+
end
|
|
56
71
|
|
|
57
|
-
|
|
72
|
+
def self.validate_filtering(filtering = {})
|
|
73
|
+
filter = Utility::Filtering.extract_filter(filtering)
|
|
58
74
|
|
|
59
|
-
|
|
75
|
+
filter_validator = Core::Filtering::FilterValidator.new(snippet_validator_classes: advanced_snippet_validators,
|
|
76
|
+
rules_validator_classes: simple_rules_validators,
|
|
77
|
+
rules_pre_processing_active: Utility::Filtering.rule_pre_processing_active?(filter))
|
|
78
|
+
filter_validator.is_filter_valid(filter)
|
|
60
79
|
end
|
|
61
80
|
|
|
62
81
|
attr_reader :rules, :advanced_filter_config
|
|
63
82
|
|
|
64
|
-
def initialize(configuration: {}, job_description:
|
|
83
|
+
def initialize(configuration: {}, job_description: nil)
|
|
65
84
|
error_monitor = Utility::ErrorMonitor.new
|
|
66
85
|
@tolerable_error_helper = Connectors::TolerableErrorHelper.new(error_monitor)
|
|
67
86
|
|
|
68
|
-
@configuration = configuration
|
|
69
|
-
@job_description = job_description&.dup
|
|
87
|
+
@configuration = job_description&.configuration&.dup || configuration&.dup || {}
|
|
88
|
+
@job_description = job_description&.dup
|
|
70
89
|
|
|
71
|
-
|
|
90
|
+
filter = Utility::Filtering.extract_filter(@job_description&.filtering)
|
|
91
|
+
filter = Core::Filtering::Transform::FilterTransformerFacade.new(filter, self.class.filter_transformers).transform
|
|
72
92
|
|
|
73
|
-
@rules =
|
|
74
|
-
|
|
93
|
+
@rules = filter[:rules] || []
|
|
94
|
+
# regression bug, we need to keep indifferent access here until we get rid of symbols in the connectors
|
|
95
|
+
@advanced_filter_config = filter[:advanced_snippet]&.with_indifferent_access || {}
|
|
75
96
|
end
|
|
76
97
|
|
|
77
98
|
def yield_documents; end
|
|
@@ -100,10 +121,6 @@ module Connectors
|
|
|
100
121
|
false
|
|
101
122
|
end
|
|
102
123
|
|
|
103
|
-
def filtering_present?
|
|
104
|
-
@advanced_filter_config.present? && !@advanced_filter_config.empty? || @rules.present?
|
|
105
|
-
end
|
|
106
|
-
|
|
107
124
|
def metadata
|
|
108
125
|
{}
|
|
109
126
|
end
|
|
@@ -22,11 +22,47 @@ module Connectors
|
|
|
22
22
|
[]
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
+
def when_triggered
|
|
26
|
+
loop do
|
|
27
|
+
connector_settings.each do |cs|
|
|
28
|
+
# crawler only supports :sync
|
|
29
|
+
if sync_triggered?(cs)
|
|
30
|
+
yield cs, :sync, nil
|
|
31
|
+
next
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
schedule_key = custom_schedule_triggered(cs)
|
|
35
|
+
yield cs, :sync, schedule_key if schedule_key
|
|
36
|
+
end
|
|
37
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
|
38
|
+
log_authorization_error(e)
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
log_standard_error(e)
|
|
41
|
+
ensure
|
|
42
|
+
if @is_shutting_down
|
|
43
|
+
break
|
|
44
|
+
end
|
|
45
|
+
sleep_for_poll_interval
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
25
49
|
private
|
|
26
50
|
|
|
27
51
|
def connector_registered?(service_type)
|
|
28
52
|
service_type == 'elastic-crawler'
|
|
29
53
|
end
|
|
54
|
+
|
|
55
|
+
# custom scheduling has no ordering, so the first-found schedule is returned
|
|
56
|
+
def custom_schedule_triggered(cs)
|
|
57
|
+
cs.custom_scheduling_settings.each do |key, custom_scheduling|
|
|
58
|
+
identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
|
|
59
|
+
if schedule_triggered?(custom_scheduling, identifier)
|
|
60
|
+
return key
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
nil
|
|
65
|
+
end
|
|
30
66
|
end
|
|
31
67
|
end
|
|
32
68
|
end
|
|
@@ -36,7 +36,7 @@ module Connectors
|
|
|
36
36
|
}
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
def initialize(configuration: {}, job_description:
|
|
39
|
+
def initialize(configuration: {}, job_description: nil)
|
|
40
40
|
super
|
|
41
41
|
end
|
|
42
42
|
|
|
@@ -47,7 +47,7 @@ module Connectors
|
|
|
47
47
|
# raise 'something went wrong'
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
-
def self.
|
|
50
|
+
def self.advanced_snippet_validators
|
|
51
51
|
ExampleAdvancedSnippetValidator
|
|
52
52
|
end
|
|
53
53
|
|
|
@@ -6,13 +6,14 @@
|
|
|
6
6
|
|
|
7
7
|
# frozen_string_literal: true
|
|
8
8
|
|
|
9
|
-
require '
|
|
9
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_validator'
|
|
10
|
+
require 'core/filtering/validation_status'
|
|
10
11
|
|
|
11
12
|
module Connectors
|
|
12
13
|
module Example
|
|
13
|
-
class ExampleAdvancedSnippetValidator <
|
|
14
|
+
class ExampleAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
|
|
14
15
|
|
|
15
|
-
def is_snippet_valid
|
|
16
|
+
def is_snippet_valid
|
|
16
17
|
# TODO: real filtering validation will follow later
|
|
17
18
|
errors = [
|
|
18
19
|
{
|
|
@@ -37,16 +37,16 @@ module Connectors
|
|
|
37
37
|
}
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
def self.
|
|
40
|
+
def self.advanced_snippet_validators
|
|
41
41
|
GitLabAdvancedSnippetValidator
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
-
def initialize(configuration: {}, job_description:
|
|
44
|
+
def initialize(configuration: {}, job_description: nil)
|
|
45
45
|
super
|
|
46
46
|
|
|
47
47
|
@extractor = Connectors::GitLab::Extractor.new(
|
|
48
|
-
:base_url => configuration.dig(:base_url, :value),
|
|
49
|
-
:api_token => configuration.dig(:api_token, :value)
|
|
48
|
+
:base_url => @configuration.dig(:base_url, :value),
|
|
49
|
+
:api_token => @configuration.dig(:api_token, :value)
|
|
50
50
|
)
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
# frozen_string_literal: true
|
|
8
8
|
|
|
9
|
-
require '
|
|
9
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_validator'
|
|
10
10
|
|
|
11
11
|
module Connectors
|
|
12
12
|
module GitLab
|
|
13
|
-
class GitLabAdvancedSnippetValidator <
|
|
13
|
+
class GitLabAdvancedSnippetValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetValidator
|
|
14
14
|
|
|
15
|
-
def is_snippet_valid
|
|
15
|
+
def is_snippet_valid
|
|
16
16
|
# TODO: real filtering validation will follow later
|
|
17
17
|
errors = [
|
|
18
18
|
{
|
|
@@ -21,13 +21,11 @@ module Connectors
|
|
|
21
21
|
}
|
|
22
22
|
]
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
log_validation_result(validation_result)
|
|
30
|
-
validation_result
|
|
24
|
+
if @advanced_snippet.present? && !@advanced_snippet.empty?
|
|
25
|
+
{ :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
|
|
26
|
+
else
|
|
27
|
+
{ :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
|
|
28
|
+
end
|
|
31
29
|
end
|
|
32
30
|
|
|
33
31
|
end
|
|
@@ -4,10 +4,11 @@
|
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
module Connectors
|
|
10
|
+
class JobTriggerMethod
|
|
11
|
+
ON_DEMAND = 'on_demand'
|
|
12
|
+
SCHEDULED = 'scheduled'
|
|
12
13
|
end
|
|
13
14
|
end
|
|
@@ -8,7 +8,8 @@
|
|
|
8
8
|
|
|
9
9
|
require 'connectors/base/connector'
|
|
10
10
|
require 'core/filtering/validation_status'
|
|
11
|
-
require '
|
|
11
|
+
require 'core/filtering/transform/transformation_target'
|
|
12
|
+
require 'connectors/mongodb/mongo_advanced_snippet_snake_case_transformer'
|
|
12
13
|
require 'connectors/mongodb/mongo_advanced_snippet_against_schema_validator'
|
|
13
14
|
require 'mongo'
|
|
14
15
|
require 'utility'
|
|
@@ -19,6 +20,9 @@ module Connectors
|
|
|
19
20
|
|
|
20
21
|
ALLOWED_TOP_LEVEL_FILTER_KEYS = %w[find aggregate]
|
|
21
22
|
|
|
23
|
+
AGGREGATE = 'aggregate'
|
|
24
|
+
FIND = 'find'
|
|
25
|
+
|
|
22
26
|
PAGE_SIZE = 100
|
|
23
27
|
|
|
24
28
|
def self.service_type
|
|
@@ -52,19 +56,25 @@ module Connectors
|
|
|
52
56
|
}
|
|
53
57
|
end
|
|
54
58
|
|
|
55
|
-
def self.
|
|
59
|
+
def self.advanced_snippet_validators
|
|
56
60
|
MongoAdvancedSnippetAgainstSchemaValidator
|
|
57
61
|
end
|
|
58
62
|
|
|
59
|
-
def
|
|
63
|
+
def self.filter_transformers
|
|
64
|
+
{
|
|
65
|
+
Core::Filtering::Transform::TransformationTarget::ADVANCED_SNIPPET => [MongoAdvancedSnippetSnakeCaseTransformer]
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def initialize(configuration: {}, job_description: nil)
|
|
60
70
|
super
|
|
61
71
|
|
|
62
|
-
@host = configuration.dig(:host, :value)
|
|
63
|
-
@database = configuration.dig(:database, :value)
|
|
64
|
-
@collection = configuration.dig(:collection, :value)
|
|
65
|
-
@user = configuration.dig(:user, :value)
|
|
66
|
-
@password = configuration.dig(:password, :value)
|
|
67
|
-
@direct_connection = configuration.dig(:direct_connection, :value)
|
|
72
|
+
@host = @configuration.dig(:host, :value)
|
|
73
|
+
@database = @configuration.dig(:database, :value)
|
|
74
|
+
@collection = @configuration.dig(:collection, :value)
|
|
75
|
+
@user = @configuration.dig(:user, :value)
|
|
76
|
+
@password = @configuration.dig(:password, :value)
|
|
77
|
+
@direct_connection = @configuration.dig(:direct_connection, :value)
|
|
68
78
|
end
|
|
69
79
|
|
|
70
80
|
def yield_documents
|
|
@@ -73,44 +83,56 @@ module Connectors
|
|
|
73
83
|
# This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
|
|
74
84
|
# It's done due to the fact that usage of .find.each leads to memory leaks or overuse of memory - the whole result set seems to stay in memory
|
|
75
85
|
# during the sync. Sometimes (not 100% sure) it even leads to a real leak, when the memory for these objects is never recycled.
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
86
|
+
cursor_type, cursor_with_options = create_db_cursor_on_collection(client[@collection])
|
|
87
|
+
cursor, options = cursor_with_options
|
|
88
|
+
|
|
89
|
+
case cursor_type
|
|
90
|
+
when FIND
|
|
91
|
+
skip = 0
|
|
92
|
+
found_overall = 0
|
|
93
|
+
|
|
94
|
+
# if no overall limit is specified by filtering use -1 to not break ingestion, when no overall limit is specified (found_overall is only increased,
|
|
95
|
+
# thus can never reach -1)
|
|
96
|
+
overall_limit = Float::INFINITY
|
|
97
|
+
|
|
98
|
+
if options.present?
|
|
99
|
+
# there could be a skip parameter defined for filtering
|
|
100
|
+
skip = options.fetch(:skip, skip)
|
|
101
|
+
# there could be a limit parameter defined for filtering -> used for an overall limit (not a page limit, which was introduced for memory optimization)
|
|
102
|
+
overall_limit = options.fetch(:limit, overall_limit)
|
|
103
|
+
end
|
|
84
104
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
105
|
+
overall_limit_reached = false
|
|
106
|
+
|
|
107
|
+
loop do
|
|
108
|
+
found_in_page = 0
|
|
109
|
+
|
|
110
|
+
Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
|
|
111
|
+
view = cursor.skip(skip).limit(PAGE_SIZE)
|
|
112
|
+
view.each do |document|
|
|
113
|
+
yield_with_handling_tolerable_errors do
|
|
114
|
+
yield serialize(document)
|
|
115
|
+
found_in_page += 1
|
|
116
|
+
found_overall += 1
|
|
117
|
+
overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
|
|
118
|
+
end
|
|
119
|
+
break if overall_limit_reached
|
|
120
|
+
end
|
|
91
121
|
|
|
92
|
-
|
|
122
|
+
page_was_empty = found_in_page == 0
|
|
93
123
|
|
|
94
|
-
|
|
95
|
-
found_in_page = 0
|
|
124
|
+
break if page_was_empty || overall_limit_reached
|
|
96
125
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
126
|
+
skip += PAGE_SIZE
|
|
127
|
+
end
|
|
128
|
+
when AGGREGATE
|
|
129
|
+
cursor.each do |document|
|
|
100
130
|
yield_with_handling_tolerable_errors do
|
|
101
131
|
yield serialize(document)
|
|
102
|
-
found_in_page += 1
|
|
103
|
-
found_overall += 1
|
|
104
|
-
overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
|
|
105
132
|
end
|
|
106
|
-
break if overall_limit_reached
|
|
107
133
|
end
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
break if page_was_empty || overall_limit_reached
|
|
112
|
-
|
|
113
|
-
skip += PAGE_SIZE
|
|
134
|
+
else
|
|
135
|
+
raise "Unknown retrieval function #{cursor_type} for MongoDB."
|
|
114
136
|
end
|
|
115
137
|
end
|
|
116
138
|
end
|
|
@@ -118,22 +140,20 @@ module Connectors
|
|
|
118
140
|
private
|
|
119
141
|
|
|
120
142
|
def create_db_cursor_on_collection(collection)
|
|
121
|
-
return
|
|
143
|
+
return [AGGREGATE, create_aggregate_cursor(collection)] if @advanced_filter_config[:aggregate].present?
|
|
122
144
|
|
|
123
|
-
return
|
|
145
|
+
return [FIND, create_find_cursor(collection)] if @advanced_filter_config[:find].present?
|
|
124
146
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
collection.find
|
|
147
|
+
[FIND, collection.find]
|
|
128
148
|
end
|
|
129
149
|
|
|
130
150
|
def create_aggregate_cursor(collection)
|
|
131
151
|
aggregate = @advanced_filter_config[:aggregate]
|
|
132
152
|
|
|
133
|
-
pipeline = aggregate[:pipeline]
|
|
153
|
+
pipeline = aggregate[:pipeline] || []
|
|
134
154
|
options = extract_options(aggregate)
|
|
135
155
|
|
|
136
|
-
if
|
|
156
|
+
if pipeline.empty? && options.empty?
|
|
137
157
|
Utility::Logger.warn('\'Aggregate\' was specified with an empty pipeline and empty options.')
|
|
138
158
|
end
|
|
139
159
|
|
|
@@ -153,16 +173,6 @@ module Connectors
|
|
|
153
173
|
[collection.find(filter, options), options]
|
|
154
174
|
end
|
|
155
175
|
|
|
156
|
-
def create_simple_rules_cursor(collection)
|
|
157
|
-
filter = {}
|
|
158
|
-
if @rules.present?
|
|
159
|
-
parser = MongoRulesParser.new(@rules)
|
|
160
|
-
filter = parser.parse
|
|
161
|
-
end
|
|
162
|
-
Utility::Logger.info("Filtering with simple rules filter: #{filter}")
|
|
163
|
-
filter.present? ? collection.find(filter) : collection.find
|
|
164
|
-
end
|
|
165
|
-
|
|
166
176
|
def extract_options(mongodb_function)
|
|
167
177
|
mongodb_function[:options].present? ? mongodb_function[:options] : {}
|
|
168
178
|
end
|
|
@@ -6,12 +6,12 @@
|
|
|
6
6
|
|
|
7
7
|
# frozen_string_literal: true
|
|
8
8
|
|
|
9
|
-
require '
|
|
9
|
+
require 'core/filtering/advanced_snippet/advanced_snippet_against_schema_validator'
|
|
10
10
|
require 'connectors/mongodb/mongo_advanced_snippet_schema'
|
|
11
11
|
|
|
12
12
|
module Connectors
|
|
13
13
|
module MongoDB
|
|
14
|
-
class MongoAdvancedSnippetAgainstSchemaValidator <
|
|
14
|
+
class MongoAdvancedSnippetAgainstSchemaValidator < Core::Filtering::AdvancedSnippet::AdvancedSnippetAgainstSchemaValidator
|
|
15
15
|
|
|
16
16
|
def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
|
|
17
17
|
super
|
|
@@ -25,7 +25,7 @@ module Connectors
|
|
|
25
25
|
NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
|
|
26
26
|
READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
|
|
27
27
|
STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
|
|
28
|
-
MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
|
|
28
|
+
MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.nil? || fields.size <= 1 }
|
|
29
29
|
|
|
30
30
|
AGGREGATION_PIPELINE = lambda { |pipeline|
|
|
31
31
|
return false unless pipeline.is_a?(Array)
|
|
@@ -265,7 +265,8 @@ module Connectors
|
|
|
265
265
|
|
|
266
266
|
FIND_FILTER = {
|
|
267
267
|
:name => 'filter',
|
|
268
|
-
:type => FILTER
|
|
268
|
+
:type => FILTER,
|
|
269
|
+
:optional => true
|
|
269
270
|
}
|
|
270
271
|
|
|
271
272
|
FIND = {
|