connectors_utility 8.4.0.1 → 8.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +26 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors_utility.rb +8 -2
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/scheduler.rb +138 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +58 -0
- data/lib/utility.rb +16 -0
- metadata +118 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1aa92692e811978885951049ebd040709d0f286ce4498d93dce54189ff7b8135
|
4
|
+
data.tar.gz: deda3bf664bb03c0b41e75acfa68b4cfa0abce03fd9fd32909c2e0a130686d14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 648d92295a527ccb6541596e030322cd9d7f6d8858edb90981f635f00f83083667e1e29837649b3310226ba180d0dd2316dc6bcb3bd12d53d4ec7c27f0eeb287
|
7
|
+
data.tar.gz: ac6c2c9b5f1bdf0334f0e7379fe0cddcb7a9cf1d32ed6ad7843da9292cd1e5c025cdd31b10f7c45a7a0d5c23969879ddeee9d7938ecc8a13b4ed577696ba0e08
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Connectors
|
10
|
+
class ConnectorStatus
|
11
|
+
CREATED = 'created'
|
12
|
+
NEEDS_CONFIGURATION = 'needs_configuration'
|
13
|
+
CONFIGURED = 'configured'
|
14
|
+
CONNECTED = 'connected'
|
15
|
+
ERROR = 'error'
|
16
|
+
|
17
|
+
STATUSES = [
|
18
|
+
CREATED,
|
19
|
+
NEEDS_CONFIGURATION,
|
20
|
+
CONFIGURED,
|
21
|
+
CONNECTED,
|
22
|
+
ERROR
|
23
|
+
]
|
24
|
+
|
25
|
+
STATUSES_ALLOWING_SYNC = [
|
26
|
+
CONFIGURED,
|
27
|
+
CONNECTED,
|
28
|
+
ERROR
|
29
|
+
]
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/scheduler'
|
10
|
+
require 'core/connector_settings'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'utility/exception_tracking'
|
14
|
+
|
15
|
+
module Connectors
|
16
|
+
module Crawler
|
17
|
+
class Scheduler < Core::Scheduler
|
18
|
+
def connector_settings
|
19
|
+
Core::ConnectorSettings.fetch_crawler_connectors || []
|
20
|
+
rescue StandardError => e
|
21
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve Crawler connectors due to unexpected error.')
|
22
|
+
[]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Connectors
|
10
|
+
class SyncStatus
|
11
|
+
COMPLETED = 'completed'
|
12
|
+
IN_PROGRESS = 'in_progress'
|
13
|
+
FAILED = 'failed'
|
14
|
+
|
15
|
+
STATUSES = [
|
16
|
+
COMPLETED,
|
17
|
+
IN_PROGRESS,
|
18
|
+
FAILED
|
19
|
+
]
|
20
|
+
end
|
21
|
+
end
|
data/lib/connectors_utility.rb
CHANGED
@@ -6,5 +6,11 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require_relative 'utility
|
10
|
-
|
9
|
+
require_relative 'utility'
|
10
|
+
|
11
|
+
require_relative 'connectors/connector_status'
|
12
|
+
require_relative 'connectors/sync_status'
|
13
|
+
require_relative 'core/scheduler'
|
14
|
+
require_relative 'core/elastic_connector_actions'
|
15
|
+
|
16
|
+
require_relative 'connectors/crawler/scheduler'
|
@@ -0,0 +1,142 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
+
require 'connectors/connector_status'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ConnectorSettings
|
16
|
+
|
17
|
+
DEFAULT_REQUEST_PIPELINE = 'ent-search-generic-ingestion'
|
18
|
+
DEFAULT_EXTRACT_BINARY_CONTENT = true
|
19
|
+
DEFAULT_REDUCE_WHITESPACE = true
|
20
|
+
DEFAULT_RUN_ML_INFERENCE = true
|
21
|
+
|
22
|
+
DEFAULT_PAGE_SIZE = 100
|
23
|
+
|
24
|
+
# Error Classes
|
25
|
+
class ConnectorNotFoundError < StandardError; end
|
26
|
+
|
27
|
+
def self.fetch_by_id(connector_id)
|
28
|
+
es_response = ElasticConnectorActions.get_connector(connector_id)
|
29
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
30
|
+
|
31
|
+
raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
|
32
|
+
new(es_response, connectors_meta)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(es_response, connectors_meta)
|
36
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
37
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
41
|
+
query = { term: { is_native: true } }
|
42
|
+
fetch_connectors_by_query(query, page_size)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.fetch_crawler_connectors(page_size = DEFAULT_PAGE_SIZE)
|
46
|
+
query = { term: { service_type: Utility::Constants::CRAWLER_SERVICE_TYPE } }
|
47
|
+
fetch_connectors_by_query(query, page_size)
|
48
|
+
end
|
49
|
+
|
50
|
+
def id
|
51
|
+
@elasticsearch_response[:_id]
|
52
|
+
end
|
53
|
+
|
54
|
+
def [](property_name)
|
55
|
+
# TODO: handle not found
|
56
|
+
@elasticsearch_response[:_source][property_name]
|
57
|
+
end
|
58
|
+
|
59
|
+
def index_name
|
60
|
+
self[:index_name]
|
61
|
+
end
|
62
|
+
|
63
|
+
def connector_status
|
64
|
+
self[:status]
|
65
|
+
end
|
66
|
+
|
67
|
+
def connector_status_allows_sync?
|
68
|
+
Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC.include?(connector_status)
|
69
|
+
end
|
70
|
+
|
71
|
+
def service_type
|
72
|
+
self[:service_type]
|
73
|
+
end
|
74
|
+
|
75
|
+
def configuration
|
76
|
+
self[:configuration]
|
77
|
+
end
|
78
|
+
|
79
|
+
def scheduling_settings
|
80
|
+
self[:scheduling]
|
81
|
+
end
|
82
|
+
|
83
|
+
def request_pipeline
|
84
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
|
+
end
|
86
|
+
|
87
|
+
def extract_binary_content?
|
88
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
|
+
end
|
90
|
+
|
91
|
+
def reduce_whitespace?
|
92
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
|
+
end
|
94
|
+
|
95
|
+
def run_ml_inference?
|
96
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
|
+
end
|
98
|
+
|
99
|
+
def formatted
|
100
|
+
properties = ["ID: #{id}"]
|
101
|
+
properties << "Service type: #{service_type}" if service_type
|
102
|
+
"connector (#{properties.join(', ')})"
|
103
|
+
end
|
104
|
+
|
105
|
+
def needs_service_type?
|
106
|
+
service_type.to_s.strip.empty?
|
107
|
+
end
|
108
|
+
|
109
|
+
def valid_index_name?
|
110
|
+
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def self.fetch_connectors_by_query(query, page_size)
|
116
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
|
+
|
118
|
+
results = []
|
119
|
+
offset = 0
|
120
|
+
loop do
|
121
|
+
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
|
+
|
123
|
+
hits = response['hits']['hits']
|
124
|
+
total = response['hits']['total']['value']
|
125
|
+
results += hits.map do |hit|
|
126
|
+
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
|
+
end
|
128
|
+
break if results.size >= total
|
129
|
+
offset += hits.size
|
130
|
+
end
|
131
|
+
|
132
|
+
results
|
133
|
+
end
|
134
|
+
|
135
|
+
def return_if_present(*args)
|
136
|
+
args.each do |arg|
|
137
|
+
return arg unless arg.nil?
|
138
|
+
end
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
#
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
require 'connectors/connector_status'
|
11
|
+
require 'connectors/sync_status'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ElasticConnectorActions
|
16
|
+
class << self
|
17
|
+
|
18
|
+
def force_sync(connector_id)
|
19
|
+
update_connector_fields(connector_id, :scheduling => { :enabled => true }, :sync_now => true)
|
20
|
+
end
|
21
|
+
|
22
|
+
def create_connector(index_name, service_type)
|
23
|
+
body = {
|
24
|
+
:scheduling => { :enabled => true },
|
25
|
+
:index_name => index_name,
|
26
|
+
:service_type => service_type
|
27
|
+
}
|
28
|
+
response = client.index(:index => Utility::Constants::CONNECTORS_INDEX, :body => body)
|
29
|
+
response['_id']
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_connector(connector_id)
|
33
|
+
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
34
|
+
end
|
35
|
+
|
36
|
+
def connectors_meta
|
37
|
+
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
38
|
+
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
39
|
+
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def search_connectors(query, page_size, offset)
|
43
|
+
client.search(
|
44
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
45
|
+
:ignore => 404,
|
46
|
+
:body => {
|
47
|
+
:size => page_size,
|
48
|
+
:from => offset,
|
49
|
+
:query => query,
|
50
|
+
:sort => ['name']
|
51
|
+
}
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def update_connector_configuration(connector_id, configuration)
|
56
|
+
update_connector_fields(connector_id, :configuration => configuration)
|
57
|
+
end
|
58
|
+
|
59
|
+
def enable_connector_scheduling(connector_id, cron_expression)
|
60
|
+
payload = { :enabled => true, :interval => cron_expression }
|
61
|
+
update_connector_fields(connector_id, :scheduling => payload)
|
62
|
+
end
|
63
|
+
|
64
|
+
def disable_connector_scheduling(connector_id)
|
65
|
+
payload = { :enabled => false }
|
66
|
+
update_connector_fields(connector_id, :scheduling => payload)
|
67
|
+
end
|
68
|
+
|
69
|
+
def set_configurable_field(connector_id, field_name, label, value)
|
70
|
+
payload = { field_name => { :value => value, :label => label } }
|
71
|
+
update_connector_configuration(connector_id, payload)
|
72
|
+
end
|
73
|
+
|
74
|
+
def claim_job(connector_id)
|
75
|
+
update_connector_fields(connector_id,
|
76
|
+
:sync_now => false,
|
77
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
78
|
+
:last_synced => Time.now)
|
79
|
+
|
80
|
+
body = {
|
81
|
+
:connector_id => connector_id,
|
82
|
+
:status => Connectors::SyncStatus::IN_PROGRESS,
|
83
|
+
:worker_hostname => Socket.gethostname,
|
84
|
+
:created_at => Time.now
|
85
|
+
}
|
86
|
+
job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
87
|
+
|
88
|
+
job['_id']
|
89
|
+
end
|
90
|
+
|
91
|
+
def update_connector_status(connector_id, status, error_message = nil)
|
92
|
+
if status == Connectors::ConnectorStatus::ERROR && error_message.nil?
|
93
|
+
raise ArgumentError, 'error_message is required when status is error'
|
94
|
+
end
|
95
|
+
body = {
|
96
|
+
:status => status,
|
97
|
+
:error => status == Connectors::ConnectorStatus::ERROR ? error_message : nil
|
98
|
+
}
|
99
|
+
update_connector_fields(connector_id, body)
|
100
|
+
end
|
101
|
+
|
102
|
+
def complete_sync(connector_id, job_id, status)
|
103
|
+
sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
|
104
|
+
|
105
|
+
update_connector_fields(connector_id,
|
106
|
+
:last_sync_status => sync_status,
|
107
|
+
:last_sync_error => status[:error],
|
108
|
+
:error => status[:error],
|
109
|
+
:last_synced => Time.now,
|
110
|
+
:last_indexed_document_count => status[:indexed_document_count],
|
111
|
+
:last_deleted_document_count => status[:deleted_document_count])
|
112
|
+
|
113
|
+
body = {
|
114
|
+
:doc => {
|
115
|
+
:status => sync_status,
|
116
|
+
:completed_at => Time.now
|
117
|
+
}.merge(status)
|
118
|
+
}
|
119
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
120
|
+
end
|
121
|
+
|
122
|
+
def fetch_document_ids(index_name)
|
123
|
+
page_size = 1000
|
124
|
+
result = []
|
125
|
+
begin
|
126
|
+
pit_id = client.open_point_in_time(:index => index_name, :keep_alive => '1m', :expand_wildcards => 'all')['id']
|
127
|
+
body = {
|
128
|
+
:query => { :match_all => {} },
|
129
|
+
:sort => [{ :id => { :order => :asc } }],
|
130
|
+
:pit => {
|
131
|
+
:id => pit_id,
|
132
|
+
:keep_alive => '1m'
|
133
|
+
},
|
134
|
+
:size => page_size,
|
135
|
+
:_source => false
|
136
|
+
}
|
137
|
+
loop do
|
138
|
+
response = client.search(:body => body)
|
139
|
+
hits = response['hits']['hits']
|
140
|
+
|
141
|
+
ids = hits.map { |h| h['_id'] }
|
142
|
+
result += ids
|
143
|
+
break if hits.size < page_size
|
144
|
+
|
145
|
+
body[:search_after] = hits.last['sort']
|
146
|
+
body[:pit][:id] = response['pit_id']
|
147
|
+
end
|
148
|
+
ensure
|
149
|
+
client.close_point_in_time(:index => index_name, :body => { :id => pit_id })
|
150
|
+
end
|
151
|
+
|
152
|
+
result
|
153
|
+
end
|
154
|
+
|
155
|
+
def ensure_content_index_exists(index_name, use_icu_locale = false, language_code = nil)
|
156
|
+
settings = Utility::Elasticsearch::Index::TextAnalysisSettings.new(:language_code => language_code, :analysis_icu => use_icu_locale).to_h
|
157
|
+
mappings = Utility::Elasticsearch::Index::Mappings.default_text_fields_mappings(:connectors_index => true)
|
158
|
+
|
159
|
+
body_payload = { settings: settings, mappings: mappings }
|
160
|
+
ensure_index_exists(index_name, body_payload)
|
161
|
+
end
|
162
|
+
|
163
|
+
def ensure_index_exists(index_name, body = {})
|
164
|
+
if client.indices.exists?(:index => index_name)
|
165
|
+
return unless body[:mappings]
|
166
|
+
Utility::Logger.debug("Index #{index_name} already exists. Checking mappings...")
|
167
|
+
Utility::Logger.debug("New mappings: #{body[:mappings]}")
|
168
|
+
response = client.indices.get_mapping(:index => index_name)
|
169
|
+
existing = response[index_name]['mappings']
|
170
|
+
if existing.empty?
|
171
|
+
Utility::Logger.debug("Index #{index_name} has no mappings. Adding mappings...")
|
172
|
+
client.indices.put_mapping(:index => index_name, :body => body[:mappings], :expand_wildcards => 'all')
|
173
|
+
Utility::Logger.debug("Index #{index_name} mappings added.")
|
174
|
+
else
|
175
|
+
Utility::Logger.debug("Index #{index_name} already has mappings: #{existing}. Skipping...")
|
176
|
+
end
|
177
|
+
else
|
178
|
+
client.indices.create(:index => index_name, :body => body)
|
179
|
+
Utility::Logger.debug("Created index #{index_name}")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def system_index_body(alias_name: nil, mappings: nil)
|
184
|
+
body = {
|
185
|
+
:settings => {
|
186
|
+
:index => {
|
187
|
+
:hidden => true,
|
188
|
+
:number_of_replicas => 0,
|
189
|
+
:auto_expand_replicas => '0-5'
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
body[:aliases] = { alias_name => { :is_write_index => true } } unless alias_name.nil? || alias_name.empty?
|
194
|
+
body[:mappings] = mappings unless mappings.nil?
|
195
|
+
body
|
196
|
+
end
|
197
|
+
|
198
|
+
# DO NOT USE this method
|
199
|
+
# Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
|
200
|
+
def ensure_connectors_index_exists
|
201
|
+
mappings = {
|
202
|
+
:properties => {
|
203
|
+
:api_key_id => { :type => :keyword },
|
204
|
+
:configuration => { :type => :object },
|
205
|
+
:error => { :type => :text },
|
206
|
+
:index_name => { :type => :keyword },
|
207
|
+
:last_seen => { :type => :date },
|
208
|
+
:last_synced => { :type => :date },
|
209
|
+
:last_indexed_document_count => { :type => :integer },
|
210
|
+
:last_deleted_document_count => { :type => :integer },
|
211
|
+
:scheduling => {
|
212
|
+
:properties => {
|
213
|
+
:enabled => { :type => :boolean },
|
214
|
+
:interval => { :type => :text }
|
215
|
+
}
|
216
|
+
},
|
217
|
+
:service_type => { :type => :keyword },
|
218
|
+
:status => { :type => :keyword },
|
219
|
+
:sync_error => { :type => :text },
|
220
|
+
:sync_now => { :type => :boolean },
|
221
|
+
:sync_status => { :type => :keyword }
|
222
|
+
}
|
223
|
+
}
|
224
|
+
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
225
|
+
end
|
226
|
+
|
227
|
+
# DO NOT USE this method
|
228
|
+
# Creation of job index should be handled by Kibana, this method is only used by ftest.rb
|
229
|
+
def ensure_job_index_exists
|
230
|
+
mappings = {
|
231
|
+
:properties => {
|
232
|
+
:connector_id => { :type => :keyword },
|
233
|
+
:status => { :type => :keyword },
|
234
|
+
:error => { :type => :text },
|
235
|
+
:worker_hostname => { :type => :keyword },
|
236
|
+
:indexed_document_count => { :type => :integer },
|
237
|
+
:deleted_document_count => { :type => :integer },
|
238
|
+
:created_at => { :type => :date },
|
239
|
+
:completed_at => { :type => :date }
|
240
|
+
}
|
241
|
+
}
|
242
|
+
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
243
|
+
end
|
244
|
+
|
245
|
+
def update_connector_fields(connector_id, doc = {})
|
246
|
+
return if doc.empty?
|
247
|
+
client.update(
|
248
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
249
|
+
:id => connector_id,
|
250
|
+
:body => { :doc => doc },
|
251
|
+
:refresh => true,
|
252
|
+
:retry_on_conflict => 3
|
253
|
+
)
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
|
258
|
+
def client
|
259
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
260
|
+
end
|
261
|
+
|
262
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
263
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
264
|
+
index_version = index_versions.max # gets the largest suffix number
|
265
|
+
"#{alias_name}-v#{index_version}"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'fugit'
|
11
|
+
require 'core/connector_settings'
|
12
|
+
require 'utility/cron'
|
13
|
+
require 'utility/logger'
|
14
|
+
require 'utility/exception_tracking'
|
15
|
+
|
16
|
+
module Core
|
17
|
+
class Scheduler
|
18
|
+
def initialize(poll_interval, heartbeat_interval)
|
19
|
+
@poll_interval = poll_interval
|
20
|
+
@heartbeat_interval = heartbeat_interval
|
21
|
+
@is_shutting_down = false
|
22
|
+
end
|
23
|
+
|
24
|
+
def connector_settings
|
25
|
+
raise 'Not implemented'
|
26
|
+
end
|
27
|
+
|
28
|
+
def when_triggered
|
29
|
+
loop do
|
30
|
+
connector_settings.each do |cs|
|
31
|
+
if sync_triggered?(cs)
|
32
|
+
yield cs, :sync
|
33
|
+
end
|
34
|
+
if heartbeat_triggered?(cs)
|
35
|
+
yield cs, :heartbeat
|
36
|
+
end
|
37
|
+
if configuration_triggered?(cs)
|
38
|
+
yield cs, :configuration
|
39
|
+
end
|
40
|
+
end
|
41
|
+
if @is_shutting_down
|
42
|
+
break
|
43
|
+
end
|
44
|
+
rescue StandardError => e
|
45
|
+
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
|
+
ensure
|
47
|
+
if @poll_interval > 0 && !@is_shutting_down
|
48
|
+
Utility::Logger.info("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
49
|
+
sleep(@poll_interval)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def shutdown
|
55
|
+
Utility::Logger.info("Shutting down scheduler #{self.class.name}.")
|
56
|
+
@is_shutting_down = true
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def sync_triggered?(connector_settings)
|
62
|
+
unless connector_settings.connector_status_allows_sync?
|
63
|
+
Utility::Logger.info("Connector #{connector_settings.id} is in status \"#{connector_settings.connector_status}\" and won't sync yet. Connector needs to be in one of the following statuses: #{Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC} to run.")
|
64
|
+
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
|
68
|
+
# Sync when sync_now flag is true for the connector
|
69
|
+
if connector_settings[:sync_now] == true
|
70
|
+
Utility::Logger.info("Connector #{connector_settings.id} is manually triggered to sync now.")
|
71
|
+
return true
|
72
|
+
end
|
73
|
+
|
74
|
+
# Don't sync if sync is explicitly disabled
|
75
|
+
scheduling_settings = connector_settings.scheduling_settings
|
76
|
+
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
77
|
+
Utility::Logger.info("Connector #{connector_settings.id} scheduling is disabled.")
|
78
|
+
return false
|
79
|
+
end
|
80
|
+
|
81
|
+
# We want to sync when sync never actually happened
|
82
|
+
last_synced = connector_settings[:last_synced]
|
83
|
+
if last_synced.nil? || last_synced.empty?
|
84
|
+
Utility::Logger.info("Connector #{connector_settings.id} has never synced yet, running initial sync.")
|
85
|
+
return true
|
86
|
+
end
|
87
|
+
|
88
|
+
current_schedule = scheduling_settings[:interval]
|
89
|
+
|
90
|
+
# Don't sync if there is no actual scheduling interval
|
91
|
+
if current_schedule.nil? || current_schedule.empty?
|
92
|
+
Utility::Logger.warn("No sync schedule configured for connector #{connector_settings.id}.")
|
93
|
+
return false
|
94
|
+
end
|
95
|
+
|
96
|
+
current_schedule = begin
|
97
|
+
Utility::Cron.quartz_to_crontab(current_schedule)
|
98
|
+
rescue StandardError => e
|
99
|
+
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
100
|
+
return false
|
101
|
+
end
|
102
|
+
cron_parser = Fugit::Cron.parse(current_schedule)
|
103
|
+
|
104
|
+
# Don't sync if the scheduling interval is non-parsable
|
105
|
+
unless cron_parser
|
106
|
+
Utility::Logger.error("Unable to parse sync schedule for connector #{connector_settings.id}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
|
110
|
+
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
111
|
+
|
112
|
+
# Sync if next trigger for the connector is in past
|
113
|
+
if next_trigger_time < Time.now
|
114
|
+
Utility::Logger.info("Connector #{connector_settings.id} sync is triggered by cron schedule #{current_schedule}.")
|
115
|
+
return true
|
116
|
+
end
|
117
|
+
|
118
|
+
false
|
119
|
+
end
|
120
|
+
|
121
|
+
def heartbeat_triggered?(connector_settings)
|
122
|
+
last_seen = connector_settings[:last_seen]
|
123
|
+
return true if last_seen.nil? || last_seen.empty?
|
124
|
+
last_seen = begin
|
125
|
+
Time.parse(last_seen)
|
126
|
+
rescue StandardError
|
127
|
+
Utility::Logger.warn("Unable to parse last_seen #{last_seen}")
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
return true unless last_seen
|
131
|
+
last_seen + @heartbeat_interval < Time.now
|
132
|
+
end
|
133
|
+
|
134
|
+
def configuration_triggered?(connector_settings)
|
135
|
+
connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Constants
|
11
|
+
THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze
|
12
|
+
SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
|
13
|
+
ALLOW_FIELD = '_allow_permissions'
|
14
|
+
DENY_FIELD = '_deny_permissions'
|
15
|
+
CONNECTORS_INDEX = '.elastic-connectors'
|
16
|
+
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
|
+
CONTENT_INDEX_PREFIX = 'search-'
|
18
|
+
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
+
end
|
20
|
+
end
|