connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -10
- data/lib/app/config.rb +2 -0
- data/lib/app/dispatcher.rb +17 -1
- data/lib/app/preflight_check.rb +15 -0
- data/lib/connectors/base/connector.rb +37 -4
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
- data/lib/connectors/example/connector.rb +43 -4
- data/lib/connectors/gitlab/connector.rb +16 -2
- data/lib/connectors/mongodb/connector.rb +173 -50
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/registry.rb +2 -2
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/configuration.rb +4 -2
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +29 -18
- data/lib/core/elastic_connector_actions.rb +331 -32
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/native_scheduler.rb +3 -0
- data/lib/core/scheduler.rb +43 -10
- data/lib/core/single_scheduler.rb +3 -0
- data/lib/core/sync_job_runner.rb +78 -18
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/common.rb +20 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -3
- metadata +27 -18
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,81 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/object'
|
10
|
+
require 'connectors/base/simple_rules_parser'
|
11
|
+
require 'core/filtering/simple_rule'
|
12
|
+
|
13
|
+
module Connectors
|
14
|
+
module MongoDB
|
15
|
+
class MongoRulesParser < Connectors::Base::SimpleRulesParser
|
16
|
+
def parse_rule(rule)
|
17
|
+
field = rule.field
|
18
|
+
value = rule.value
|
19
|
+
unless value.present?
|
20
|
+
raise "value is required for field: #{field}"
|
21
|
+
end
|
22
|
+
unless field.present?
|
23
|
+
raise "field is required for rule: #{rule}"
|
24
|
+
end
|
25
|
+
op = rule.rule
|
26
|
+
case op
|
27
|
+
when Core::Filtering::SimpleRule::Rule::EQUALS
|
28
|
+
parse_equals(rule)
|
29
|
+
when Core::Filtering::SimpleRule::Rule::GREATER_THAN
|
30
|
+
parse_greater_than(rule)
|
31
|
+
when Core::Filtering::SimpleRule::Rule::LESS_THAN
|
32
|
+
parse_less_than(rule)
|
33
|
+
when Core::Filtering::SimpleRule::Rule::REGEX
|
34
|
+
parse_regex(rule)
|
35
|
+
else
|
36
|
+
raise "Unknown operator: #{op}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def merge_rules(rules)
|
41
|
+
return {} if rules.empty?
|
42
|
+
return rules[0] if rules.size == 1
|
43
|
+
{ '$and' => rules }
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def parse_equals(rule)
|
49
|
+
if rule.is_include?
|
50
|
+
{ rule.field => rule.value }
|
51
|
+
else
|
52
|
+
{ rule.field => { '$ne' => rule.value } }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_greater_than(rule)
|
57
|
+
if rule.is_include?
|
58
|
+
{ rule.field => { '$gt' => rule.value } }
|
59
|
+
else
|
60
|
+
{ rule.field => { '$lte' => rule.value } }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def parse_less_than(rule)
|
65
|
+
if rule.is_include?
|
66
|
+
{ rule.field => { '$lt' => rule.value } }
|
67
|
+
else
|
68
|
+
{ rule.field => { '$gte' => rule.value } }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_regex(rule)
|
73
|
+
if rule.is_include?
|
74
|
+
{ rule.field => /#{rule.value}/ }
|
75
|
+
else
|
76
|
+
{ rule.field => { '$not' => /#{rule.value}/ } }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/connectors/registry.rb
CHANGED
@@ -24,10 +24,10 @@ module Connectors
|
|
24
24
|
@connectors[name]
|
25
25
|
end
|
26
26
|
|
27
|
-
def connector(name, configuration)
|
27
|
+
def connector(name, configuration, job_description: {})
|
28
28
|
klass = connector_class(name)
|
29
29
|
if klass.present?
|
30
|
-
return klass.new(configuration: configuration)
|
30
|
+
return klass.new(configuration: configuration, job_description: job_description)
|
31
31
|
end
|
32
32
|
raise "Connector #{name} is not yet registered. You need to register it before use"
|
33
33
|
end
|
@@ -8,14 +8,33 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class SyncStatus
|
11
|
-
|
11
|
+
PENDING = 'pending'
|
12
12
|
IN_PROGRESS = 'in_progress'
|
13
|
-
|
13
|
+
CANCELING = 'canceling'
|
14
|
+
CANCELED = 'canceled'
|
15
|
+
SUSPENDED = 'suspended'
|
16
|
+
COMPLETED = 'completed'
|
17
|
+
ERROR = 'error'
|
14
18
|
|
15
19
|
STATUSES = [
|
16
|
-
|
20
|
+
PENDING,
|
17
21
|
IN_PROGRESS,
|
18
|
-
|
22
|
+
CANCELING,
|
23
|
+
CANCELED,
|
24
|
+
SUSPENDED,
|
25
|
+
COMPLETED,
|
26
|
+
ERROR
|
27
|
+
]
|
28
|
+
|
29
|
+
PENDING_STATUES = [
|
30
|
+
PENDING,
|
31
|
+
SUSPENDED
|
32
|
+
]
|
33
|
+
|
34
|
+
TERMINAL_STATUSES = [
|
35
|
+
CANCELED,
|
36
|
+
COMPLETED,
|
37
|
+
ERROR
|
19
38
|
]
|
20
39
|
end
|
21
40
|
end
|
data/lib/core/configuration.rb
CHANGED
@@ -23,9 +23,11 @@ module Core
|
|
23
23
|
Utility::Logger.error("Couldn't find connector for service type #{connector_settings.service_type || service_type}")
|
24
24
|
return
|
25
25
|
end
|
26
|
-
configuration = connector_class.
|
26
|
+
configuration = connector_class.configurable_fields_indifferent_access
|
27
|
+
features = connector_class.kibana_features.each_with_object({}) { |feature, hsh| hsh[feature] = true }
|
27
28
|
doc = {
|
28
|
-
:configuration => configuration
|
29
|
+
:configuration => configuration,
|
30
|
+
:features => features
|
29
31
|
}
|
30
32
|
|
31
33
|
doc[:service_type] = service_type if service_type && connector_settings.needs_service_type?
|
@@ -0,0 +1,137 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
+
require 'connectors/sync_status'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ConnectorJob
|
16
|
+
DEFAULT_PAGE_SIZE = 100
|
17
|
+
|
18
|
+
# Error Classes
|
19
|
+
class ConnectorJobNotFoundError < StandardError; end
|
20
|
+
|
21
|
+
def self.fetch_by_id(job_id)
|
22
|
+
es_response = ElasticConnectorActions.get_job(job_id)
|
23
|
+
|
24
|
+
raise ConnectorJobNotFoundError.new("Connector job with id=#{job_id} was not found.") unless es_response[:found]
|
25
|
+
new(es_response)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.pending_jobs(page_size = DEFAULT_PAGE_SIZE)
|
29
|
+
query = { terms: { status: Connectors::SyncStatus::PENDING_STATUES } }
|
30
|
+
fetch_jobs_by_query(query, page_size)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.orphaned_jobs(_page_size = DEFAULT_PAGE_SIZE)
|
34
|
+
[]
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.stuck_jobs(_page_size = DEFAULT_PAGE_SIZE)
|
38
|
+
[]
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.enqueue(_connector_id)
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def id
|
46
|
+
@elasticsearch_response[:_id]
|
47
|
+
end
|
48
|
+
|
49
|
+
def [](property_name)
|
50
|
+
@elasticsearch_response[:_source][property_name]
|
51
|
+
end
|
52
|
+
|
53
|
+
def status
|
54
|
+
self[:status]
|
55
|
+
end
|
56
|
+
|
57
|
+
def in_progress?
|
58
|
+
status == Connectors::SyncStatus::IN_PROGRESS
|
59
|
+
end
|
60
|
+
|
61
|
+
def canceling?
|
62
|
+
status == Connectors::SyncStatus::CANCELING
|
63
|
+
end
|
64
|
+
|
65
|
+
def connector_snapshot
|
66
|
+
self[:connector]
|
67
|
+
end
|
68
|
+
|
69
|
+
def connector_id
|
70
|
+
connector_snapshot[:id]
|
71
|
+
end
|
72
|
+
|
73
|
+
def index_name
|
74
|
+
connector_snapshot[:configuration]
|
75
|
+
end
|
76
|
+
|
77
|
+
def language
|
78
|
+
connector_snapshot[:language]
|
79
|
+
end
|
80
|
+
|
81
|
+
def service_type
|
82
|
+
connector_snapshot[:service_type]
|
83
|
+
end
|
84
|
+
|
85
|
+
def configuration
|
86
|
+
connector_snapshot[:configuration]
|
87
|
+
end
|
88
|
+
|
89
|
+
def filtering
|
90
|
+
Utility::Filtering.extract_filter(connector_snapshot[:filtering])
|
91
|
+
end
|
92
|
+
|
93
|
+
def pipeline
|
94
|
+
connector_snapshot[:pipeline]
|
95
|
+
end
|
96
|
+
|
97
|
+
def connector
|
98
|
+
@connector ||= ConnectorSettings.fetch_by_id(connector_id)
|
99
|
+
end
|
100
|
+
|
101
|
+
def reload_connector!
|
102
|
+
@connector = nil
|
103
|
+
connector
|
104
|
+
end
|
105
|
+
|
106
|
+
def reload
|
107
|
+
es_response = ElasticConnectorActions.get_job(id)
|
108
|
+
raise ConnectorJobNotFoundError.new("Connector job with id=#{id} was not found.") unless es_response[:found]
|
109
|
+
# TODO: remove the usage of with_indifferent_access. get_id method is expected to return a hash
|
110
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
111
|
+
@connector = nil
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def initialize(es_response)
|
117
|
+
# TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
|
118
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.fetch_jobs_by_query(query, page_size)
|
122
|
+
results = []
|
123
|
+
offset = 0
|
124
|
+
loop do
|
125
|
+
response = ElasticConnectorActions.search_jobs(query, page_size, offset)
|
126
|
+
|
127
|
+
hits = response.dig('hits', 'hits') || []
|
128
|
+
total = response.dig('hits', 'total', 'value') || 0
|
129
|
+
results += hits.map { |hit| new(hit) }
|
130
|
+
break if results.size >= total
|
131
|
+
offset += hits.size
|
132
|
+
end
|
133
|
+
|
134
|
+
results
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/registry'
|
11
12
|
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility'
|
13
14
|
|
@@ -19,6 +20,8 @@ module Core
|
|
19
20
|
DEFAULT_REDUCE_WHITESPACE = true
|
20
21
|
DEFAULT_RUN_ML_INFERENCE = true
|
21
22
|
|
23
|
+
DEFAULT_FILTERING = {}
|
24
|
+
|
22
25
|
DEFAULT_PAGE_SIZE = 100
|
23
26
|
|
24
27
|
# Error Classes
|
@@ -32,13 +35,15 @@ module Core
|
|
32
35
|
new(es_response, connectors_meta)
|
33
36
|
end
|
34
37
|
|
35
|
-
def initialize(es_response, connectors_meta)
|
36
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
37
|
-
@connectors_meta = connectors_meta.with_indifferent_access
|
38
|
-
end
|
39
|
-
|
40
38
|
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
41
|
-
query = {
|
39
|
+
query = {
|
40
|
+
bool: {
|
41
|
+
filter: [
|
42
|
+
{ term: { is_native: true } },
|
43
|
+
{ terms: { service_type: Connectors::REGISTRY.registered_connectors } }
|
44
|
+
]
|
45
|
+
}
|
46
|
+
}
|
42
47
|
fetch_connectors_by_query(query, page_size)
|
43
48
|
end
|
44
49
|
|
@@ -80,20 +85,27 @@ module Core
|
|
80
85
|
self[:scheduling]
|
81
86
|
end
|
82
87
|
|
88
|
+
def filtering
|
89
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
90
|
+
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
91
|
+
|
92
|
+
Utility::Filtering.extract_filter(filtering)
|
93
|
+
end
|
94
|
+
|
83
95
|
def request_pipeline
|
84
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
96
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
97
|
end
|
86
98
|
|
87
99
|
def extract_binary_content?
|
88
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
100
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
101
|
end
|
90
102
|
|
91
103
|
def reduce_whitespace?
|
92
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
104
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
105
|
end
|
94
106
|
|
95
107
|
def run_ml_inference?
|
96
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
108
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
109
|
end
|
98
110
|
|
99
111
|
def formatted
|
@@ -112,6 +124,11 @@ module Core
|
|
112
124
|
|
113
125
|
private
|
114
126
|
|
127
|
+
def initialize(es_response, connectors_meta)
|
128
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
129
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
130
|
+
end
|
131
|
+
|
115
132
|
def self.fetch_connectors_by_query(query, page_size)
|
116
133
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
134
|
|
@@ -120,8 +137,8 @@ module Core
|
|
120
137
|
loop do
|
121
138
|
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
139
|
|
123
|
-
hits = response
|
124
|
-
total = response
|
140
|
+
hits = response.dig('hits', 'hits') || []
|
141
|
+
total = response.dig('hits', 'total', 'value') || 0
|
125
142
|
results += hits.map do |hit|
|
126
143
|
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
144
|
end
|
@@ -132,11 +149,5 @@ module Core
|
|
132
149
|
results
|
133
150
|
end
|
134
151
|
|
135
|
-
def return_if_present(*args)
|
136
|
-
args.each do |arg|
|
137
|
-
return arg unless arg.nil?
|
138
|
-
end
|
139
|
-
nil
|
140
|
-
end
|
141
152
|
end
|
142
153
|
end
|