connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -10
- data/lib/app/config.rb +2 -0
- data/lib/app/dispatcher.rb +17 -1
- data/lib/app/preflight_check.rb +15 -0
- data/lib/connectors/base/connector.rb +37 -4
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
- data/lib/connectors/example/connector.rb +43 -4
- data/lib/connectors/gitlab/connector.rb +16 -2
- data/lib/connectors/mongodb/connector.rb +173 -50
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/registry.rb +2 -2
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/configuration.rb +4 -2
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +29 -18
- data/lib/core/elastic_connector_actions.rb +331 -32
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/native_scheduler.rb +3 -0
- data/lib/core/scheduler.rb +43 -10
- data/lib/core/single_scheduler.rb +3 -0
- data/lib/core/sync_job_runner.rb +78 -18
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/common.rb +20 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -3
- metadata +27 -18
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,81 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/object'
|
10
|
+
require 'connectors/base/simple_rules_parser'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
|
13
|
+
module Connectors
|
14
|
+
module MongoDB
|
15
|
+
class MongoRulesParser < Connectors::Base::SimpleRulesParser
|
16
|
+
def parse_rule(rule)
|
17
|
+
field = rule.field
|
18
|
+
value = rule.value
|
19
|
+
unless value.present?
|
20
|
+
raise "value is required for field: #{field}"
|
21
|
+
end
|
22
|
+
unless field.present?
|
23
|
+
raise "field is required for rule: #{rule}"
|
24
|
+
end
|
25
|
+
op = rule.rule
|
26
|
+
case op
|
27
|
+
when Core::Filtering::SimpleRule::Rule::EQUALS
|
28
|
+
parse_equals(rule)
|
29
|
+
when Core::Filtering::SimpleRule::Rule::GREATER_THAN
|
30
|
+
parse_greater_than(rule)
|
31
|
+
when Core::Filtering::SimpleRule::Rule::LESS_THAN
|
32
|
+
parse_less_than(rule)
|
33
|
+
when Core::Filtering::SimpleRule::Rule::REGEX
|
34
|
+
parse_regex(rule)
|
35
|
+
else
|
36
|
+
raise "Unknown operator: #{op}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def merge_rules(rules)
|
41
|
+
return {} if rules.empty?
|
42
|
+
return rules[0] if rules.size == 1
|
43
|
+
{ '$and' => rules }
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def parse_equals(rule)
|
49
|
+
if rule.is_include?
|
50
|
+
{ rule.field => rule.value }
|
51
|
+
else
|
52
|
+
{ rule.field => { '$ne' => rule.value } }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_greater_than(rule)
|
57
|
+
if rule.is_include?
|
58
|
+
{ rule.field => { '$gt' => rule.value } }
|
59
|
+
else
|
60
|
+
{ rule.field => { '$lte' => rule.value } }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def parse_less_than(rule)
|
65
|
+
if rule.is_include?
|
66
|
+
{ rule.field => { '$lt' => rule.value } }
|
67
|
+
else
|
68
|
+
{ rule.field => { '$gte' => rule.value } }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_regex(rule)
|
73
|
+
if rule.is_include?
|
74
|
+
{ rule.field => /#{rule.value}/ }
|
75
|
+
else
|
76
|
+
{ rule.field => { '$not' => /#{rule.value}/ } }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/connectors/registry.rb
CHANGED
@@ -24,10 +24,10 @@ module Connectors
|
|
24
24
|
@connectors[name]
|
25
25
|
end
|
26
26
|
|
27
|
-
def connector(name, configuration)
|
27
|
+
def connector(name, configuration, job_description: {})
|
28
28
|
klass = connector_class(name)
|
29
29
|
if klass.present?
|
30
|
-
return klass.new(configuration: configuration)
|
30
|
+
return klass.new(configuration: configuration, job_description: job_description)
|
31
31
|
end
|
32
32
|
raise "Connector #{name} is not yet registered. You need to register it before use"
|
33
33
|
end
|
@@ -8,14 +8,33 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class SyncStatus
|
11
|
-
|
11
|
+
PENDING = 'pending'
|
12
12
|
IN_PROGRESS = 'in_progress'
|
13
|
-
|
13
|
+
CANCELING = 'canceling'
|
14
|
+
CANCELED = 'canceled'
|
15
|
+
SUSPENDED = 'suspended'
|
16
|
+
COMPLETED = 'completed'
|
17
|
+
ERROR = 'error'
|
14
18
|
|
15
19
|
STATUSES = [
|
16
|
-
|
20
|
+
PENDING,
|
17
21
|
IN_PROGRESS,
|
18
|
-
|
22
|
+
CANCELING,
|
23
|
+
CANCELED,
|
24
|
+
SUSPENDED,
|
25
|
+
COMPLETED,
|
26
|
+
ERROR
|
27
|
+
]
|
28
|
+
|
29
|
+
PENDING_STATUES = [
|
30
|
+
PENDING,
|
31
|
+
SUSPENDED
|
32
|
+
]
|
33
|
+
|
34
|
+
TERMINAL_STATUSES = [
|
35
|
+
CANCELED,
|
36
|
+
COMPLETED,
|
37
|
+
ERROR
|
19
38
|
]
|
20
39
|
end
|
21
40
|
end
|
data/lib/core/configuration.rb
CHANGED
@@ -23,9 +23,11 @@ module Core
|
|
23
23
|
Utility::Logger.error("Couldn't find connector for service type #{connector_settings.service_type || service_type}")
|
24
24
|
return
|
25
25
|
end
|
26
|
-
configuration = connector_class.
|
26
|
+
configuration = connector_class.configurable_fields_indifferent_access
|
27
|
+
features = connector_class.kibana_features.each_with_object({}) { |feature, hsh| hsh[feature] = true }
|
27
28
|
doc = {
|
28
|
-
:configuration => configuration
|
29
|
+
:configuration => configuration,
|
30
|
+
:features => features
|
29
31
|
}
|
30
32
|
|
31
33
|
doc[:service_type] = service_type if service_type && connector_settings.needs_service_type?
|
@@ -0,0 +1,137 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
+
require 'connectors/sync_status'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ConnectorJob
|
16
|
+
DEFAULT_PAGE_SIZE = 100
|
17
|
+
|
18
|
+
# Error Classes
|
19
|
+
class ConnectorJobNotFoundError < StandardError; end
|
20
|
+
|
21
|
+
def self.fetch_by_id(job_id)
|
22
|
+
es_response = ElasticConnectorActions.get_job(job_id)
|
23
|
+
|
24
|
+
raise ConnectorJobNotFoundError.new("Connector job with id=#{job_id} was not found.") unless es_response[:found]
|
25
|
+
new(es_response)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.pending_jobs(page_size = DEFAULT_PAGE_SIZE)
|
29
|
+
query = { terms: { status: Connectors::SyncStatus::PENDING_STATUES } }
|
30
|
+
fetch_jobs_by_query(query, page_size)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.orphaned_jobs(_page_size = DEFAULT_PAGE_SIZE)
|
34
|
+
[]
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.stuck_jobs(_page_size = DEFAULT_PAGE_SIZE)
|
38
|
+
[]
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.enqueue(_connector_id)
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def id
|
46
|
+
@elasticsearch_response[:_id]
|
47
|
+
end
|
48
|
+
|
49
|
+
def [](property_name)
|
50
|
+
@elasticsearch_response[:_source][property_name]
|
51
|
+
end
|
52
|
+
|
53
|
+
def status
|
54
|
+
self[:status]
|
55
|
+
end
|
56
|
+
|
57
|
+
def in_progress?
|
58
|
+
status == Connectors::SyncStatus::IN_PROGRESS
|
59
|
+
end
|
60
|
+
|
61
|
+
def canceling?
|
62
|
+
status == Connectors::SyncStatus::CANCELING
|
63
|
+
end
|
64
|
+
|
65
|
+
def connector_snapshot
|
66
|
+
self[:connector]
|
67
|
+
end
|
68
|
+
|
69
|
+
def connector_id
|
70
|
+
connector_snapshot[:id]
|
71
|
+
end
|
72
|
+
|
73
|
+
def index_name
|
74
|
+
connector_snapshot[:configuration]
|
75
|
+
end
|
76
|
+
|
77
|
+
def language
|
78
|
+
connector_snapshot[:language]
|
79
|
+
end
|
80
|
+
|
81
|
+
def service_type
|
82
|
+
connector_snapshot[:service_type]
|
83
|
+
end
|
84
|
+
|
85
|
+
def configuration
|
86
|
+
connector_snapshot[:configuration]
|
87
|
+
end
|
88
|
+
|
89
|
+
def filtering
|
90
|
+
Utility::Filtering.extract_filter(connector_snapshot[:filtering])
|
91
|
+
end
|
92
|
+
|
93
|
+
def pipeline
|
94
|
+
connector_snapshot[:pipeline]
|
95
|
+
end
|
96
|
+
|
97
|
+
def connector
|
98
|
+
@connector ||= ConnectorSettings.fetch_by_id(connector_id)
|
99
|
+
end
|
100
|
+
|
101
|
+
def reload_connector!
|
102
|
+
@connector = nil
|
103
|
+
connector
|
104
|
+
end
|
105
|
+
|
106
|
+
def reload
|
107
|
+
es_response = ElasticConnectorActions.get_job(id)
|
108
|
+
raise ConnectorJobNotFoundError.new("Connector job with id=#{id} was not found.") unless es_response[:found]
|
109
|
+
# TODO: remove the usage of with_indifferent_access. get_id method is expected to return a hash
|
110
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
111
|
+
@connector = nil
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def initialize(es_response)
|
117
|
+
# TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
|
118
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.fetch_jobs_by_query(query, page_size)
|
122
|
+
results = []
|
123
|
+
offset = 0
|
124
|
+
loop do
|
125
|
+
response = ElasticConnectorActions.search_jobs(query, page_size, offset)
|
126
|
+
|
127
|
+
hits = response.dig('hits', 'hits') || []
|
128
|
+
total = response.dig('hits', 'total', 'value') || 0
|
129
|
+
results += hits.map { |hit| new(hit) }
|
130
|
+
break if results.size >= total
|
131
|
+
offset += hits.size
|
132
|
+
end
|
133
|
+
|
134
|
+
results
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/registry'
|
11
12
|
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility'
|
13
14
|
|
@@ -19,6 +20,8 @@ module Core
|
|
19
20
|
DEFAULT_REDUCE_WHITESPACE = true
|
20
21
|
DEFAULT_RUN_ML_INFERENCE = true
|
21
22
|
|
23
|
+
DEFAULT_FILTERING = {}
|
24
|
+
|
22
25
|
DEFAULT_PAGE_SIZE = 100
|
23
26
|
|
24
27
|
# Error Classes
|
@@ -32,13 +35,15 @@ module Core
|
|
32
35
|
new(es_response, connectors_meta)
|
33
36
|
end
|
34
37
|
|
35
|
-
def initialize(es_response, connectors_meta)
|
36
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
37
|
-
@connectors_meta = connectors_meta.with_indifferent_access
|
38
|
-
end
|
39
|
-
|
40
38
|
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
41
|
-
query = {
|
39
|
+
query = {
|
40
|
+
bool: {
|
41
|
+
filter: [
|
42
|
+
{ term: { is_native: true } },
|
43
|
+
{ terms: { service_type: Connectors::REGISTRY.registered_connectors } }
|
44
|
+
]
|
45
|
+
}
|
46
|
+
}
|
42
47
|
fetch_connectors_by_query(query, page_size)
|
43
48
|
end
|
44
49
|
|
@@ -80,20 +85,27 @@ module Core
|
|
80
85
|
self[:scheduling]
|
81
86
|
end
|
82
87
|
|
88
|
+
def filtering
|
89
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
90
|
+
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
91
|
+
|
92
|
+
Utility::Filtering.extract_filter(filtering)
|
93
|
+
end
|
94
|
+
|
83
95
|
def request_pipeline
|
84
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
96
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
97
|
end
|
86
98
|
|
87
99
|
def extract_binary_content?
|
88
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
100
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
101
|
end
|
90
102
|
|
91
103
|
def reduce_whitespace?
|
92
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
104
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
105
|
end
|
94
106
|
|
95
107
|
def run_ml_inference?
|
96
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
108
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
109
|
end
|
98
110
|
|
99
111
|
def formatted
|
@@ -112,6 +124,11 @@ module Core
|
|
112
124
|
|
113
125
|
private
|
114
126
|
|
127
|
+
def initialize(es_response, connectors_meta)
|
128
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
129
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
130
|
+
end
|
131
|
+
|
115
132
|
def self.fetch_connectors_by_query(query, page_size)
|
116
133
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
134
|
|
@@ -120,8 +137,8 @@ module Core
|
|
120
137
|
loop do
|
121
138
|
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
139
|
|
123
|
-
hits = response
|
124
|
-
total = response
|
140
|
+
hits = response.dig('hits', 'hits') || []
|
141
|
+
total = response.dig('hits', 'total', 'value') || 0
|
125
142
|
results += hits.map do |hit|
|
126
143
|
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
144
|
end
|
@@ -132,11 +149,5 @@ module Core
|
|
132
149
|
results
|
133
150
|
end
|
134
151
|
|
135
|
-
def return_if_present(*args)
|
136
|
-
args.each do |arg|
|
137
|
-
return arg unless arg.nil?
|
138
|
-
end
|
139
|
-
nil
|
140
|
-
end
|
141
152
|
end
|
142
153
|
end
|