connectors_service 8.5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/bin/connectors_service +4 -0
- data/bin/list_connectors +4 -0
- data/config/connectors.yml +25 -0
- data/lib/app/app.rb +25 -0
- data/lib/app/config.rb +132 -0
- data/lib/app/console_app.rb +278 -0
- data/lib/app/dispatcher.rb +121 -0
- data/lib/app/menu.rb +104 -0
- data/lib/app/preflight_check.rb +134 -0
- data/lib/app/version.rb +10 -0
- data/lib/connectors/base/adapter.rb +119 -0
- data/lib/connectors/base/connector.rb +57 -0
- data/lib/connectors/base/custom_client.rb +111 -0
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +32 -0
- data/lib/connectors/example/connector.rb +57 -0
- data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
- data/lib/connectors/gitlab/adapter.rb +50 -0
- data/lib/connectors/gitlab/connector.rb +67 -0
- data/lib/connectors/gitlab/custom_client.rb +44 -0
- data/lib/connectors/gitlab/extractor.rb +69 -0
- data/lib/connectors/mongodb/connector.rb +138 -0
- data/lib/connectors/registry.rb +52 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors.rb +16 -0
- data/lib/connectors_app/// +13 -0
- data/lib/connectors_service.rb +24 -0
- data/lib/connectors_utility.rb +16 -0
- data/lib/core/configuration.rb +48 -0
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/heartbeat.rb +32 -0
- data/lib/core/native_scheduler.rb +24 -0
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/output_sink.rb +13 -0
- data/lib/core/scheduler.rb +158 -0
- data/lib/core/single_scheduler.rb +29 -0
- data/lib/core/sync_job_runner.rb +111 -0
- data/lib/core.rb +16 -0
- data/lib/list_connectors.rb +22 -0
- data/lib/stubs/app_config.rb +35 -0
- data/lib/stubs/connectors/stats.rb +35 -0
- data/lib/stubs/service_type.rb +13 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/elasticsearch/index/language_data.yml +111 -0
- data/lib/utility/elasticsearch/index/mappings.rb +104 -0
- data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +84 -0
- data/lib/utility/middleware/basic_auth.rb +27 -0
- data/lib/utility/middleware/bearer_auth.rb +27 -0
- data/lib/utility/middleware/restrict_hostnames.rb +73 -0
- data/lib/utility.rb +16 -0
- metadata +487 -0
@@ -0,0 +1,142 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
+
require 'connectors/connector_status'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ConnectorSettings
|
16
|
+
|
17
|
+
DEFAULT_REQUEST_PIPELINE = 'ent-search-generic-ingestion'
|
18
|
+
DEFAULT_EXTRACT_BINARY_CONTENT = true
|
19
|
+
DEFAULT_REDUCE_WHITESPACE = true
|
20
|
+
DEFAULT_RUN_ML_INFERENCE = true
|
21
|
+
|
22
|
+
DEFAULT_PAGE_SIZE = 100
|
23
|
+
|
24
|
+
# Error Classes
|
25
|
+
class ConnectorNotFoundError < StandardError; end
|
26
|
+
|
27
|
+
def self.fetch_by_id(connector_id)
|
28
|
+
es_response = ElasticConnectorActions.get_connector(connector_id)
|
29
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
30
|
+
|
31
|
+
raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
|
32
|
+
new(es_response, connectors_meta)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(es_response, connectors_meta)
|
36
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
37
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
41
|
+
query = { term: { is_native: true } }
|
42
|
+
fetch_connectors_by_query(query, page_size)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.fetch_crawler_connectors(page_size = DEFAULT_PAGE_SIZE)
|
46
|
+
query = { term: { service_type: Utility::Constants::CRAWLER_SERVICE_TYPE } }
|
47
|
+
fetch_connectors_by_query(query, page_size)
|
48
|
+
end
|
49
|
+
|
50
|
+
def id
|
51
|
+
@elasticsearch_response[:_id]
|
52
|
+
end
|
53
|
+
|
54
|
+
def [](property_name)
|
55
|
+
# TODO: handle not found
|
56
|
+
@elasticsearch_response[:_source][property_name]
|
57
|
+
end
|
58
|
+
|
59
|
+
def index_name
|
60
|
+
self[:index_name]
|
61
|
+
end
|
62
|
+
|
63
|
+
def connector_status
|
64
|
+
self[:status]
|
65
|
+
end
|
66
|
+
|
67
|
+
def connector_status_allows_sync?
|
68
|
+
Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC.include?(connector_status)
|
69
|
+
end
|
70
|
+
|
71
|
+
def service_type
|
72
|
+
self[:service_type]
|
73
|
+
end
|
74
|
+
|
75
|
+
def configuration
|
76
|
+
self[:configuration]
|
77
|
+
end
|
78
|
+
|
79
|
+
def scheduling_settings
|
80
|
+
self[:scheduling]
|
81
|
+
end
|
82
|
+
|
83
|
+
def request_pipeline
|
84
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
|
+
end
|
86
|
+
|
87
|
+
def extract_binary_content?
|
88
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
|
+
end
|
90
|
+
|
91
|
+
def reduce_whitespace?
|
92
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
|
+
end
|
94
|
+
|
95
|
+
def run_ml_inference?
|
96
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
|
+
end
|
98
|
+
|
99
|
+
def formatted
|
100
|
+
properties = ["ID: #{id}"]
|
101
|
+
properties << "Service type: #{service_type}" if service_type
|
102
|
+
"connector (#{properties.join(', ')})"
|
103
|
+
end
|
104
|
+
|
105
|
+
def needs_service_type?
|
106
|
+
service_type.to_s.strip.empty?
|
107
|
+
end
|
108
|
+
|
109
|
+
def valid_index_name?
|
110
|
+
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def self.fetch_connectors_by_query(query, page_size)
|
116
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
|
+
|
118
|
+
results = []
|
119
|
+
offset = 0
|
120
|
+
loop do
|
121
|
+
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
|
+
|
123
|
+
hits = response['hits']['hits']
|
124
|
+
total = response['hits']['total']['value']
|
125
|
+
results += hits.map do |hit|
|
126
|
+
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
|
+
end
|
128
|
+
break if results.size >= total
|
129
|
+
offset += hits.size
|
130
|
+
end
|
131
|
+
|
132
|
+
results
|
133
|
+
end
|
134
|
+
|
135
|
+
def return_if_present(*args)
|
136
|
+
args.each do |arg|
|
137
|
+
return arg unless arg.nil?
|
138
|
+
end
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
#
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
require 'connectors/connector_status'
|
11
|
+
require 'connectors/sync_status'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ElasticConnectorActions
|
16
|
+
class << self
|
17
|
+
|
18
|
+
def force_sync(connector_id)
|
19
|
+
update_connector_fields(connector_id, :scheduling => { :enabled => true }, :sync_now => true)
|
20
|
+
end
|
21
|
+
|
22
|
+
def create_connector(index_name, service_type)
|
23
|
+
body = {
|
24
|
+
:scheduling => { :enabled => true },
|
25
|
+
:index_name => index_name,
|
26
|
+
:service_type => service_type
|
27
|
+
}
|
28
|
+
response = client.index(:index => Utility::Constants::CONNECTORS_INDEX, :body => body)
|
29
|
+
response['_id']
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_connector(connector_id)
|
33
|
+
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
34
|
+
end
|
35
|
+
|
36
|
+
def connectors_meta
|
37
|
+
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
38
|
+
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
39
|
+
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def search_connectors(query, page_size, offset)
|
43
|
+
client.search(
|
44
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
45
|
+
:ignore => 404,
|
46
|
+
:body => {
|
47
|
+
:size => page_size,
|
48
|
+
:from => offset,
|
49
|
+
:query => query,
|
50
|
+
:sort => ['name']
|
51
|
+
}
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def update_connector_configuration(connector_id, configuration)
|
56
|
+
update_connector_fields(connector_id, :configuration => configuration)
|
57
|
+
end
|
58
|
+
|
59
|
+
def enable_connector_scheduling(connector_id, cron_expression)
|
60
|
+
payload = { :enabled => true, :interval => cron_expression }
|
61
|
+
update_connector_fields(connector_id, :scheduling => payload)
|
62
|
+
end
|
63
|
+
|
64
|
+
def disable_connector_scheduling(connector_id)
|
65
|
+
payload = { :enabled => false }
|
66
|
+
update_connector_fields(connector_id, :scheduling => payload)
|
67
|
+
end
|
68
|
+
|
69
|
+
def set_configurable_field(connector_id, field_name, label, value)
|
70
|
+
payload = { field_name => { :value => value, :label => label } }
|
71
|
+
update_connector_configuration(connector_id, payload)
|
72
|
+
end
|
73
|
+
|
74
|
+
def claim_job(connector_id)
|
75
|
+
update_connector_fields(connector_id,
|
76
|
+
:sync_now => false,
|
77
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
78
|
+
:last_synced => Time.now)
|
79
|
+
|
80
|
+
body = {
|
81
|
+
:connector_id => connector_id,
|
82
|
+
:status => Connectors::SyncStatus::IN_PROGRESS,
|
83
|
+
:worker_hostname => Socket.gethostname,
|
84
|
+
:created_at => Time.now
|
85
|
+
}
|
86
|
+
job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
87
|
+
|
88
|
+
job['_id']
|
89
|
+
end
|
90
|
+
|
91
|
+
def update_connector_status(connector_id, status, error_message = nil)
|
92
|
+
if status == Connectors::ConnectorStatus::ERROR && error_message.nil?
|
93
|
+
raise ArgumentError, 'error_message is required when status is error'
|
94
|
+
end
|
95
|
+
body = {
|
96
|
+
:status => status,
|
97
|
+
:error => status == Connectors::ConnectorStatus::ERROR ? error_message : nil
|
98
|
+
}
|
99
|
+
update_connector_fields(connector_id, body)
|
100
|
+
end
|
101
|
+
|
102
|
+
def complete_sync(connector_id, job_id, status)
|
103
|
+
sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
|
104
|
+
|
105
|
+
update_connector_fields(connector_id,
|
106
|
+
:last_sync_status => sync_status,
|
107
|
+
:last_sync_error => status[:error],
|
108
|
+
:error => status[:error],
|
109
|
+
:last_synced => Time.now,
|
110
|
+
:last_indexed_document_count => status[:indexed_document_count],
|
111
|
+
:last_deleted_document_count => status[:deleted_document_count])
|
112
|
+
|
113
|
+
body = {
|
114
|
+
:doc => {
|
115
|
+
:status => sync_status,
|
116
|
+
:completed_at => Time.now
|
117
|
+
}.merge(status)
|
118
|
+
}
|
119
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
120
|
+
end
|
121
|
+
|
122
|
+
def fetch_document_ids(index_name)
|
123
|
+
page_size = 1000
|
124
|
+
result = []
|
125
|
+
begin
|
126
|
+
pit_id = client.open_point_in_time(:index => index_name, :keep_alive => '1m', :expand_wildcards => 'all')['id']
|
127
|
+
body = {
|
128
|
+
:query => { :match_all => {} },
|
129
|
+
:sort => [{ :id => { :order => :asc } }],
|
130
|
+
:pit => {
|
131
|
+
:id => pit_id,
|
132
|
+
:keep_alive => '1m'
|
133
|
+
},
|
134
|
+
:size => page_size,
|
135
|
+
:_source => false
|
136
|
+
}
|
137
|
+
loop do
|
138
|
+
response = client.search(:body => body)
|
139
|
+
hits = response['hits']['hits']
|
140
|
+
|
141
|
+
ids = hits.map { |h| h['_id'] }
|
142
|
+
result += ids
|
143
|
+
break if hits.size < page_size
|
144
|
+
|
145
|
+
body[:search_after] = hits.last['sort']
|
146
|
+
body[:pit][:id] = response['pit_id']
|
147
|
+
end
|
148
|
+
ensure
|
149
|
+
client.close_point_in_time(:index => index_name, :body => { :id => pit_id })
|
150
|
+
end
|
151
|
+
|
152
|
+
result
|
153
|
+
end
|
154
|
+
|
155
|
+
def ensure_content_index_exists(index_name, use_icu_locale = false, language_code = nil)
|
156
|
+
settings = Utility::Elasticsearch::Index::TextAnalysisSettings.new(:language_code => language_code, :analysis_icu => use_icu_locale).to_h
|
157
|
+
mappings = Utility::Elasticsearch::Index::Mappings.default_text_fields_mappings(:connectors_index => true)
|
158
|
+
|
159
|
+
body_payload = { settings: settings, mappings: mappings }
|
160
|
+
ensure_index_exists(index_name, body_payload)
|
161
|
+
end
|
162
|
+
|
163
|
+
def ensure_index_exists(index_name, body = {})
|
164
|
+
if client.indices.exists?(:index => index_name)
|
165
|
+
return unless body[:mappings]
|
166
|
+
Utility::Logger.debug("Index #{index_name} already exists. Checking mappings...")
|
167
|
+
Utility::Logger.debug("New mappings: #{body[:mappings]}")
|
168
|
+
response = client.indices.get_mapping(:index => index_name)
|
169
|
+
existing = response[index_name]['mappings']
|
170
|
+
if existing.empty?
|
171
|
+
Utility::Logger.debug("Index #{index_name} has no mappings. Adding mappings...")
|
172
|
+
client.indices.put_mapping(:index => index_name, :body => body[:mappings], :expand_wildcards => 'all')
|
173
|
+
Utility::Logger.debug("Index #{index_name} mappings added.")
|
174
|
+
else
|
175
|
+
Utility::Logger.debug("Index #{index_name} already has mappings: #{existing}. Skipping...")
|
176
|
+
end
|
177
|
+
else
|
178
|
+
client.indices.create(:index => index_name, :body => body)
|
179
|
+
Utility::Logger.debug("Created index #{index_name}")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def system_index_body(alias_name: nil, mappings: nil)
|
184
|
+
body = {
|
185
|
+
:settings => {
|
186
|
+
:index => {
|
187
|
+
:hidden => true,
|
188
|
+
:number_of_replicas => 0,
|
189
|
+
:auto_expand_replicas => '0-5'
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
body[:aliases] = { alias_name => { :is_write_index => true } } unless alias_name.nil? || alias_name.empty?
|
194
|
+
body[:mappings] = mappings unless mappings.nil?
|
195
|
+
body
|
196
|
+
end
|
197
|
+
|
198
|
+
# DO NOT USE this method
|
199
|
+
# Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
|
200
|
+
def ensure_connectors_index_exists
|
201
|
+
mappings = {
|
202
|
+
:properties => {
|
203
|
+
:api_key_id => { :type => :keyword },
|
204
|
+
:configuration => { :type => :object },
|
205
|
+
:error => { :type => :text },
|
206
|
+
:index_name => { :type => :keyword },
|
207
|
+
:last_seen => { :type => :date },
|
208
|
+
:last_synced => { :type => :date },
|
209
|
+
:last_indexed_document_count => { :type => :integer },
|
210
|
+
:last_deleted_document_count => { :type => :integer },
|
211
|
+
:scheduling => {
|
212
|
+
:properties => {
|
213
|
+
:enabled => { :type => :boolean },
|
214
|
+
:interval => { :type => :text }
|
215
|
+
}
|
216
|
+
},
|
217
|
+
:service_type => { :type => :keyword },
|
218
|
+
:status => { :type => :keyword },
|
219
|
+
:sync_error => { :type => :text },
|
220
|
+
:sync_now => { :type => :boolean },
|
221
|
+
:sync_status => { :type => :keyword }
|
222
|
+
}
|
223
|
+
}
|
224
|
+
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
225
|
+
end
|
226
|
+
|
227
|
+
# DO NOT USE this method
|
228
|
+
# Creation of job index should be handled by Kibana, this method is only used by ftest.rb
|
229
|
+
def ensure_job_index_exists
|
230
|
+
mappings = {
|
231
|
+
:properties => {
|
232
|
+
:connector_id => { :type => :keyword },
|
233
|
+
:status => { :type => :keyword },
|
234
|
+
:error => { :type => :text },
|
235
|
+
:worker_hostname => { :type => :keyword },
|
236
|
+
:indexed_document_count => { :type => :integer },
|
237
|
+
:deleted_document_count => { :type => :integer },
|
238
|
+
:created_at => { :type => :date },
|
239
|
+
:completed_at => { :type => :date }
|
240
|
+
}
|
241
|
+
}
|
242
|
+
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
243
|
+
end
|
244
|
+
|
245
|
+
def update_connector_fields(connector_id, doc = {})
|
246
|
+
return if doc.empty?
|
247
|
+
client.update(
|
248
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
249
|
+
:id => connector_id,
|
250
|
+
:body => { :doc => doc },
|
251
|
+
:refresh => true,
|
252
|
+
:retry_on_conflict => 3
|
253
|
+
)
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
|
258
|
+
def client
|
259
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
260
|
+
end
|
261
|
+
|
262
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
263
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
264
|
+
index_version = index_versions.max # gets the largest suffix number
|
265
|
+
"#{alias_name}-v#{index_version}"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class Heartbeat
|
16
|
+
class << self
|
17
|
+
def send(connector_settings)
|
18
|
+
doc = {
|
19
|
+
:last_seen => Time.now
|
20
|
+
}
|
21
|
+
if connector_settings.connector_status_allows_sync?
|
22
|
+
connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
|
23
|
+
doc[:status] = connector_instance.is_healthy? ? Connectors::ConnectorStatus::CONNECTED : Connectors::ConnectorStatus::ERROR
|
24
|
+
message = "Health check for 3d party service failed for connector [#{connector_settings.id}], service type [#{connector_settings.service_type}]. Check the application logs for more information."
|
25
|
+
doc[:error] = doc[:status] == Connectors::ConnectorStatus::ERROR ? message : nil
|
26
|
+
end
|
27
|
+
|
28
|
+
Core::ElasticConnectorActions.update_connector_fields(connector_settings.id, doc)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/scheduler'
|
10
|
+
require 'core/connector_settings'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'utility/exception_tracking'
|
14
|
+
|
15
|
+
module Core
|
16
|
+
class NativeScheduler < Core::Scheduler
|
17
|
+
def connector_settings
|
18
|
+
Core::ConnectorSettings.fetch_native_connectors || []
|
19
|
+
rescue StandardError => e
|
20
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
|
21
|
+
[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Core
|
10
|
+
module OutputSink
|
11
|
+
class BaseSink
|
12
|
+
def ingest(_document)
|
13
|
+
raise 'not implemented'
|
14
|
+
end
|
15
|
+
|
16
|
+
def ingest_multiple(_documents)
|
17
|
+
raise 'not implemented'
|
18
|
+
end
|
19
|
+
|
20
|
+
def delete(_id)
|
21
|
+
raise 'not implemented'
|
22
|
+
end
|
23
|
+
|
24
|
+
def delete_multiple(_ids)
|
25
|
+
raise 'not implemented'
|
26
|
+
end
|
27
|
+
|
28
|
+
def flush(_size: nil)
|
29
|
+
raise 'not implemented'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink/base_sink'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core::OutputSink
|
13
|
+
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
+
def initialize(sinks = [])
|
15
|
+
@sinks = sinks
|
16
|
+
end
|
17
|
+
|
18
|
+
def ingest(document)
|
19
|
+
@sinks.each { |sink| sink.ingest(document) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def flush(size: nil)
|
23
|
+
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def ingest_multiple(documents)
|
27
|
+
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def delete(id)
|
31
|
+
@sinks.each { |sink| sink.delete(id) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def delete_multiple(ids)
|
35
|
+
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Core::OutputSink
|
13
|
+
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
+
def ingest(document)
|
15
|
+
print_header 'Got a single document:'
|
16
|
+
puts document
|
17
|
+
end
|
18
|
+
|
19
|
+
def flush(size: nil)
|
20
|
+
print_header 'Flushing'
|
21
|
+
puts "Flush size: #{size}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def ingest_multiple(documents)
|
25
|
+
print_header 'Got multiple documents:'
|
26
|
+
puts documents
|
27
|
+
end
|
28
|
+
|
29
|
+
def delete(id)
|
30
|
+
print_header "Deleting single id: #{id}"
|
31
|
+
puts id
|
32
|
+
end
|
33
|
+
|
34
|
+
def delete_multiple(ids)
|
35
|
+
print_header "Deleting several ids: #{ids}"
|
36
|
+
puts ids
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def print_delim
|
42
|
+
puts '----------------------------------------------------'
|
43
|
+
end
|
44
|
+
|
45
|
+
def print_header(header)
|
46
|
+
print_delim
|
47
|
+
puts header
|
48
|
+
print_delim
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/numeric/time'
|
10
|
+
require 'app/config'
|
11
|
+
require 'core/output_sink/base_sink'
|
12
|
+
require 'utility/es_client'
|
13
|
+
require 'utility/logger'
|
14
|
+
|
15
|
+
module Core::OutputSink
|
16
|
+
class EsSink < Core::OutputSink::BaseSink
|
17
|
+
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
+
super()
|
19
|
+
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
+
@index_name = index_name
|
21
|
+
@request_pipeline = request_pipeline
|
22
|
+
@operation_queue = []
|
23
|
+
@flush_threshold = flush_threshold
|
24
|
+
end
|
25
|
+
|
26
|
+
def ingest(document)
|
27
|
+
return if document.blank?
|
28
|
+
|
29
|
+
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
+
flush if ready_to_flush?
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(doc_id)
|
34
|
+
return if doc_id.nil?
|
35
|
+
|
36
|
+
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
+
flush if ready_to_flush?
|
38
|
+
end
|
39
|
+
|
40
|
+
def flush(size: nil)
|
41
|
+
flush_size = size || @flush_threshold
|
42
|
+
|
43
|
+
while @operation_queue.any?
|
44
|
+
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
+
send_data(data_to_flush)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def ingest_multiple(documents)
|
50
|
+
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
+
documents.each { |doc| ingest(doc) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def delete_multiple(ids)
|
55
|
+
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
+
ids.each { |id| delete(id) }
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
attr_accessor :index_name
|
62
|
+
|
63
|
+
def send_data(ops)
|
64
|
+
return if ops.empty?
|
65
|
+
|
66
|
+
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
+
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
+
end
|
69
|
+
|
70
|
+
def ready_to_flush?
|
71
|
+
@operation_queue.size >= @flush_threshold
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/output_sink/es_sink'
|
10
|
+
require 'core/output_sink/console_sink'
|
11
|
+
require 'core/output_sink/combined_sink'
|
12
|
+
|
13
|
+
module Core::OutputSink; end
|