connectors_utility 8.4.0.0 → 8.5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +26 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors_utility.rb +8 -2
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/scheduler.rb +138 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/elasticsearch/index/mappings.rb +68 -42
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +58 -0
- data/lib/utility.rb +16 -0
- metadata +117 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1aa92692e811978885951049ebd040709d0f286ce4498d93dce54189ff7b8135
|
4
|
+
data.tar.gz: deda3bf664bb03c0b41e75acfa68b4cfa0abce03fd9fd32909c2e0a130686d14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 648d92295a527ccb6541596e030322cd9d7f6d8858edb90981f635f00f83083667e1e29837649b3310226ba180d0dd2316dc6bcb3bd12d53d4ec7c27f0eeb287
|
7
|
+
data.tar.gz: ac6c2c9b5f1bdf0334f0e7379fe0cddcb7a9cf1d32ed6ad7843da9292cd1e5c025cdd31b10f7c45a7a0d5c23969879ddeee9d7938ecc8a13b4ed577696ba0e08
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Connectors
|
10
|
+
class ConnectorStatus
|
11
|
+
CREATED = 'created'
|
12
|
+
NEEDS_CONFIGURATION = 'needs_configuration'
|
13
|
+
CONFIGURED = 'configured'
|
14
|
+
CONNECTED = 'connected'
|
15
|
+
ERROR = 'error'
|
16
|
+
|
17
|
+
STATUSES = [
|
18
|
+
CREATED,
|
19
|
+
NEEDS_CONFIGURATION,
|
20
|
+
CONFIGURED,
|
21
|
+
CONNECTED,
|
22
|
+
ERROR
|
23
|
+
]
|
24
|
+
|
25
|
+
STATUSES_ALLOWING_SYNC = [
|
26
|
+
CONFIGURED,
|
27
|
+
CONNECTED,
|
28
|
+
ERROR
|
29
|
+
]
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/scheduler'
|
10
|
+
require 'core/connector_settings'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility/logger'
|
13
|
+
require 'utility/exception_tracking'
|
14
|
+
|
15
|
+
module Connectors
|
16
|
+
module Crawler
|
17
|
+
class Scheduler < Core::Scheduler
|
18
|
+
def connector_settings
|
19
|
+
Core::ConnectorSettings.fetch_crawler_connectors || []
|
20
|
+
rescue StandardError => e
|
21
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve Crawler connectors due to unexpected error.')
|
22
|
+
[]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Connectors
|
10
|
+
class SyncStatus
|
11
|
+
COMPLETED = 'completed'
|
12
|
+
IN_PROGRESS = 'in_progress'
|
13
|
+
FAILED = 'failed'
|
14
|
+
|
15
|
+
STATUSES = [
|
16
|
+
COMPLETED,
|
17
|
+
IN_PROGRESS,
|
18
|
+
FAILED
|
19
|
+
]
|
20
|
+
end
|
21
|
+
end
|
data/lib/connectors_utility.rb
CHANGED
@@ -6,5 +6,11 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require_relative 'utility
|
10
|
-
|
9
|
+
require_relative 'utility'
|
10
|
+
|
11
|
+
require_relative 'connectors/connector_status'
|
12
|
+
require_relative 'connectors/sync_status'
|
13
|
+
require_relative 'core/scheduler'
|
14
|
+
require_relative 'core/elastic_connector_actions'
|
15
|
+
|
16
|
+
require_relative 'connectors/crawler/scheduler'
|
@@ -0,0 +1,142 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
+
require 'connectors/connector_status'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ConnectorSettings
|
16
|
+
|
17
|
+
DEFAULT_REQUEST_PIPELINE = 'ent-search-generic-ingestion'
|
18
|
+
DEFAULT_EXTRACT_BINARY_CONTENT = true
|
19
|
+
DEFAULT_REDUCE_WHITESPACE = true
|
20
|
+
DEFAULT_RUN_ML_INFERENCE = true
|
21
|
+
|
22
|
+
DEFAULT_PAGE_SIZE = 100
|
23
|
+
|
24
|
+
# Error Classes
|
25
|
+
class ConnectorNotFoundError < StandardError; end
|
26
|
+
|
27
|
+
def self.fetch_by_id(connector_id)
|
28
|
+
es_response = ElasticConnectorActions.get_connector(connector_id)
|
29
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
30
|
+
|
31
|
+
raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
|
32
|
+
new(es_response, connectors_meta)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(es_response, connectors_meta)
|
36
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
37
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
41
|
+
query = { term: { is_native: true } }
|
42
|
+
fetch_connectors_by_query(query, page_size)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.fetch_crawler_connectors(page_size = DEFAULT_PAGE_SIZE)
|
46
|
+
query = { term: { service_type: Utility::Constants::CRAWLER_SERVICE_TYPE } }
|
47
|
+
fetch_connectors_by_query(query, page_size)
|
48
|
+
end
|
49
|
+
|
50
|
+
def id
|
51
|
+
@elasticsearch_response[:_id]
|
52
|
+
end
|
53
|
+
|
54
|
+
def [](property_name)
|
55
|
+
# TODO: handle not found
|
56
|
+
@elasticsearch_response[:_source][property_name]
|
57
|
+
end
|
58
|
+
|
59
|
+
def index_name
|
60
|
+
self[:index_name]
|
61
|
+
end
|
62
|
+
|
63
|
+
def connector_status
|
64
|
+
self[:status]
|
65
|
+
end
|
66
|
+
|
67
|
+
def connector_status_allows_sync?
|
68
|
+
Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC.include?(connector_status)
|
69
|
+
end
|
70
|
+
|
71
|
+
def service_type
|
72
|
+
self[:service_type]
|
73
|
+
end
|
74
|
+
|
75
|
+
def configuration
|
76
|
+
self[:configuration]
|
77
|
+
end
|
78
|
+
|
79
|
+
def scheduling_settings
|
80
|
+
self[:scheduling]
|
81
|
+
end
|
82
|
+
|
83
|
+
def request_pipeline
|
84
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
|
+
end
|
86
|
+
|
87
|
+
def extract_binary_content?
|
88
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
|
+
end
|
90
|
+
|
91
|
+
def reduce_whitespace?
|
92
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
|
+
end
|
94
|
+
|
95
|
+
def run_ml_inference?
|
96
|
+
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
|
+
end
|
98
|
+
|
99
|
+
def formatted
|
100
|
+
properties = ["ID: #{id}"]
|
101
|
+
properties << "Service type: #{service_type}" if service_type
|
102
|
+
"connector (#{properties.join(', ')})"
|
103
|
+
end
|
104
|
+
|
105
|
+
def needs_service_type?
|
106
|
+
service_type.to_s.strip.empty?
|
107
|
+
end
|
108
|
+
|
109
|
+
def valid_index_name?
|
110
|
+
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def self.fetch_connectors_by_query(query, page_size)
|
116
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
|
+
|
118
|
+
results = []
|
119
|
+
offset = 0
|
120
|
+
loop do
|
121
|
+
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
|
+
|
123
|
+
hits = response['hits']['hits']
|
124
|
+
total = response['hits']['total']['value']
|
125
|
+
results += hits.map do |hit|
|
126
|
+
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
|
+
end
|
128
|
+
break if results.size >= total
|
129
|
+
offset += hits.size
|
130
|
+
end
|
131
|
+
|
132
|
+
results
|
133
|
+
end
|
134
|
+
|
135
|
+
def return_if_present(*args)
|
136
|
+
args.each do |arg|
|
137
|
+
return arg unless arg.nil?
|
138
|
+
end
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
#
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
require 'connectors/connector_status'
|
11
|
+
require 'connectors/sync_status'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class ElasticConnectorActions
|
16
|
+
class << self
|
17
|
+
|
18
|
+
def force_sync(connector_id)
|
19
|
+
update_connector_fields(connector_id, :scheduling => { :enabled => true }, :sync_now => true)
|
20
|
+
end
|
21
|
+
|
22
|
+
def create_connector(index_name, service_type)
|
23
|
+
body = {
|
24
|
+
:scheduling => { :enabled => true },
|
25
|
+
:index_name => index_name,
|
26
|
+
:service_type => service_type
|
27
|
+
}
|
28
|
+
response = client.index(:index => Utility::Constants::CONNECTORS_INDEX, :body => body)
|
29
|
+
response['_id']
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_connector(connector_id)
|
33
|
+
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
34
|
+
end
|
35
|
+
|
36
|
+
def connectors_meta
|
37
|
+
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
38
|
+
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
39
|
+
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def search_connectors(query, page_size, offset)
|
43
|
+
client.search(
|
44
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
45
|
+
:ignore => 404,
|
46
|
+
:body => {
|
47
|
+
:size => page_size,
|
48
|
+
:from => offset,
|
49
|
+
:query => query,
|
50
|
+
:sort => ['name']
|
51
|
+
}
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def update_connector_configuration(connector_id, configuration)
|
56
|
+
update_connector_fields(connector_id, :configuration => configuration)
|
57
|
+
end
|
58
|
+
|
59
|
+
def enable_connector_scheduling(connector_id, cron_expression)
|
60
|
+
payload = { :enabled => true, :interval => cron_expression }
|
61
|
+
update_connector_fields(connector_id, :scheduling => payload)
|
62
|
+
end
|
63
|
+
|
64
|
+
def disable_connector_scheduling(connector_id)
|
65
|
+
payload = { :enabled => false }
|
66
|
+
update_connector_fields(connector_id, :scheduling => payload)
|
67
|
+
end
|
68
|
+
|
69
|
+
def set_configurable_field(connector_id, field_name, label, value)
|
70
|
+
payload = { field_name => { :value => value, :label => label } }
|
71
|
+
update_connector_configuration(connector_id, payload)
|
72
|
+
end
|
73
|
+
|
74
|
+
def claim_job(connector_id)
|
75
|
+
update_connector_fields(connector_id,
|
76
|
+
:sync_now => false,
|
77
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
78
|
+
:last_synced => Time.now)
|
79
|
+
|
80
|
+
body = {
|
81
|
+
:connector_id => connector_id,
|
82
|
+
:status => Connectors::SyncStatus::IN_PROGRESS,
|
83
|
+
:worker_hostname => Socket.gethostname,
|
84
|
+
:created_at => Time.now
|
85
|
+
}
|
86
|
+
job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
87
|
+
|
88
|
+
job['_id']
|
89
|
+
end
|
90
|
+
|
91
|
+
def update_connector_status(connector_id, status, error_message = nil)
|
92
|
+
if status == Connectors::ConnectorStatus::ERROR && error_message.nil?
|
93
|
+
raise ArgumentError, 'error_message is required when status is error'
|
94
|
+
end
|
95
|
+
body = {
|
96
|
+
:status => status,
|
97
|
+
:error => status == Connectors::ConnectorStatus::ERROR ? error_message : nil
|
98
|
+
}
|
99
|
+
update_connector_fields(connector_id, body)
|
100
|
+
end
|
101
|
+
|
102
|
+
def complete_sync(connector_id, job_id, status)
|
103
|
+
sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
|
104
|
+
|
105
|
+
update_connector_fields(connector_id,
|
106
|
+
:last_sync_status => sync_status,
|
107
|
+
:last_sync_error => status[:error],
|
108
|
+
:error => status[:error],
|
109
|
+
:last_synced => Time.now,
|
110
|
+
:last_indexed_document_count => status[:indexed_document_count],
|
111
|
+
:last_deleted_document_count => status[:deleted_document_count])
|
112
|
+
|
113
|
+
body = {
|
114
|
+
:doc => {
|
115
|
+
:status => sync_status,
|
116
|
+
:completed_at => Time.now
|
117
|
+
}.merge(status)
|
118
|
+
}
|
119
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
120
|
+
end
|
121
|
+
|
122
|
+
def fetch_document_ids(index_name)
|
123
|
+
page_size = 1000
|
124
|
+
result = []
|
125
|
+
begin
|
126
|
+
pit_id = client.open_point_in_time(:index => index_name, :keep_alive => '1m', :expand_wildcards => 'all')['id']
|
127
|
+
body = {
|
128
|
+
:query => { :match_all => {} },
|
129
|
+
:sort => [{ :id => { :order => :asc } }],
|
130
|
+
:pit => {
|
131
|
+
:id => pit_id,
|
132
|
+
:keep_alive => '1m'
|
133
|
+
},
|
134
|
+
:size => page_size,
|
135
|
+
:_source => false
|
136
|
+
}
|
137
|
+
loop do
|
138
|
+
response = client.search(:body => body)
|
139
|
+
hits = response['hits']['hits']
|
140
|
+
|
141
|
+
ids = hits.map { |h| h['_id'] }
|
142
|
+
result += ids
|
143
|
+
break if hits.size < page_size
|
144
|
+
|
145
|
+
body[:search_after] = hits.last['sort']
|
146
|
+
body[:pit][:id] = response['pit_id']
|
147
|
+
end
|
148
|
+
ensure
|
149
|
+
client.close_point_in_time(:index => index_name, :body => { :id => pit_id })
|
150
|
+
end
|
151
|
+
|
152
|
+
result
|
153
|
+
end
|
154
|
+
|
155
|
+
def ensure_content_index_exists(index_name, use_icu_locale = false, language_code = nil)
|
156
|
+
settings = Utility::Elasticsearch::Index::TextAnalysisSettings.new(:language_code => language_code, :analysis_icu => use_icu_locale).to_h
|
157
|
+
mappings = Utility::Elasticsearch::Index::Mappings.default_text_fields_mappings(:connectors_index => true)
|
158
|
+
|
159
|
+
body_payload = { settings: settings, mappings: mappings }
|
160
|
+
ensure_index_exists(index_name, body_payload)
|
161
|
+
end
|
162
|
+
|
163
|
+
def ensure_index_exists(index_name, body = {})
|
164
|
+
if client.indices.exists?(:index => index_name)
|
165
|
+
return unless body[:mappings]
|
166
|
+
Utility::Logger.debug("Index #{index_name} already exists. Checking mappings...")
|
167
|
+
Utility::Logger.debug("New mappings: #{body[:mappings]}")
|
168
|
+
response = client.indices.get_mapping(:index => index_name)
|
169
|
+
existing = response[index_name]['mappings']
|
170
|
+
if existing.empty?
|
171
|
+
Utility::Logger.debug("Index #{index_name} has no mappings. Adding mappings...")
|
172
|
+
client.indices.put_mapping(:index => index_name, :body => body[:mappings], :expand_wildcards => 'all')
|
173
|
+
Utility::Logger.debug("Index #{index_name} mappings added.")
|
174
|
+
else
|
175
|
+
Utility::Logger.debug("Index #{index_name} already has mappings: #{existing}. Skipping...")
|
176
|
+
end
|
177
|
+
else
|
178
|
+
client.indices.create(:index => index_name, :body => body)
|
179
|
+
Utility::Logger.debug("Created index #{index_name}")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def system_index_body(alias_name: nil, mappings: nil)
|
184
|
+
body = {
|
185
|
+
:settings => {
|
186
|
+
:index => {
|
187
|
+
:hidden => true,
|
188
|
+
:number_of_replicas => 0,
|
189
|
+
:auto_expand_replicas => '0-5'
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}
|
193
|
+
body[:aliases] = { alias_name => { :is_write_index => true } } unless alias_name.nil? || alias_name.empty?
|
194
|
+
body[:mappings] = mappings unless mappings.nil?
|
195
|
+
body
|
196
|
+
end
|
197
|
+
|
198
|
+
# DO NOT USE this method
|
199
|
+
# Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
|
200
|
+
def ensure_connectors_index_exists
|
201
|
+
mappings = {
|
202
|
+
:properties => {
|
203
|
+
:api_key_id => { :type => :keyword },
|
204
|
+
:configuration => { :type => :object },
|
205
|
+
:error => { :type => :text },
|
206
|
+
:index_name => { :type => :keyword },
|
207
|
+
:last_seen => { :type => :date },
|
208
|
+
:last_synced => { :type => :date },
|
209
|
+
:last_indexed_document_count => { :type => :integer },
|
210
|
+
:last_deleted_document_count => { :type => :integer },
|
211
|
+
:scheduling => {
|
212
|
+
:properties => {
|
213
|
+
:enabled => { :type => :boolean },
|
214
|
+
:interval => { :type => :text }
|
215
|
+
}
|
216
|
+
},
|
217
|
+
:service_type => { :type => :keyword },
|
218
|
+
:status => { :type => :keyword },
|
219
|
+
:sync_error => { :type => :text },
|
220
|
+
:sync_now => { :type => :boolean },
|
221
|
+
:sync_status => { :type => :keyword }
|
222
|
+
}
|
223
|
+
}
|
224
|
+
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
225
|
+
end
|
226
|
+
|
227
|
+
# DO NOT USE this method
|
228
|
+
# Creation of job index should be handled by Kibana, this method is only used by ftest.rb
|
229
|
+
def ensure_job_index_exists
|
230
|
+
mappings = {
|
231
|
+
:properties => {
|
232
|
+
:connector_id => { :type => :keyword },
|
233
|
+
:status => { :type => :keyword },
|
234
|
+
:error => { :type => :text },
|
235
|
+
:worker_hostname => { :type => :keyword },
|
236
|
+
:indexed_document_count => { :type => :integer },
|
237
|
+
:deleted_document_count => { :type => :integer },
|
238
|
+
:created_at => { :type => :date },
|
239
|
+
:completed_at => { :type => :date }
|
240
|
+
}
|
241
|
+
}
|
242
|
+
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
243
|
+
end
|
244
|
+
|
245
|
+
def update_connector_fields(connector_id, doc = {})
|
246
|
+
return if doc.empty?
|
247
|
+
client.update(
|
248
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
249
|
+
:id => connector_id,
|
250
|
+
:body => { :doc => doc },
|
251
|
+
:refresh => true,
|
252
|
+
:retry_on_conflict => 3
|
253
|
+
)
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
|
258
|
+
def client
|
259
|
+
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
260
|
+
end
|
261
|
+
|
262
|
+
def get_latest_index_in_alias(alias_name, indicies)
|
263
|
+
index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
|
264
|
+
index_version = index_versions.max # gets the largest suffix number
|
265
|
+
"#{alias_name}-v#{index_version}"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'fugit'
|
11
|
+
require 'core/connector_settings'
|
12
|
+
require 'utility/cron'
|
13
|
+
require 'utility/logger'
|
14
|
+
require 'utility/exception_tracking'
|
15
|
+
|
16
|
+
module Core
|
17
|
+
class Scheduler
|
18
|
+
def initialize(poll_interval, heartbeat_interval)
|
19
|
+
@poll_interval = poll_interval
|
20
|
+
@heartbeat_interval = heartbeat_interval
|
21
|
+
@is_shutting_down = false
|
22
|
+
end
|
23
|
+
|
24
|
+
def connector_settings
|
25
|
+
raise 'Not implemented'
|
26
|
+
end
|
27
|
+
|
28
|
+
def when_triggered
|
29
|
+
loop do
|
30
|
+
connector_settings.each do |cs|
|
31
|
+
if sync_triggered?(cs)
|
32
|
+
yield cs, :sync
|
33
|
+
end
|
34
|
+
if heartbeat_triggered?(cs)
|
35
|
+
yield cs, :heartbeat
|
36
|
+
end
|
37
|
+
if configuration_triggered?(cs)
|
38
|
+
yield cs, :configuration
|
39
|
+
end
|
40
|
+
end
|
41
|
+
if @is_shutting_down
|
42
|
+
break
|
43
|
+
end
|
44
|
+
rescue StandardError => e
|
45
|
+
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
|
+
ensure
|
47
|
+
if @poll_interval > 0 && !@is_shutting_down
|
48
|
+
Utility::Logger.info("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
49
|
+
sleep(@poll_interval)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def shutdown
|
55
|
+
Utility::Logger.info("Shutting down scheduler #{self.class.name}.")
|
56
|
+
@is_shutting_down = true
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def sync_triggered?(connector_settings)
|
62
|
+
unless connector_settings.connector_status_allows_sync?
|
63
|
+
Utility::Logger.info("Connector #{connector_settings.id} is in status \"#{connector_settings.connector_status}\" and won't sync yet. Connector needs to be in one of the following statuses: #{Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC} to run.")
|
64
|
+
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
|
68
|
+
# Sync when sync_now flag is true for the connector
|
69
|
+
if connector_settings[:sync_now] == true
|
70
|
+
Utility::Logger.info("Connector #{connector_settings.id} is manually triggered to sync now.")
|
71
|
+
return true
|
72
|
+
end
|
73
|
+
|
74
|
+
# Don't sync if sync is explicitly disabled
|
75
|
+
scheduling_settings = connector_settings.scheduling_settings
|
76
|
+
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
77
|
+
Utility::Logger.info("Connector #{connector_settings.id} scheduling is disabled.")
|
78
|
+
return false
|
79
|
+
end
|
80
|
+
|
81
|
+
# We want to sync when sync never actually happened
|
82
|
+
last_synced = connector_settings[:last_synced]
|
83
|
+
if last_synced.nil? || last_synced.empty?
|
84
|
+
Utility::Logger.info("Connector #{connector_settings.id} has never synced yet, running initial sync.")
|
85
|
+
return true
|
86
|
+
end
|
87
|
+
|
88
|
+
current_schedule = scheduling_settings[:interval]
|
89
|
+
|
90
|
+
# Don't sync if there is no actual scheduling interval
|
91
|
+
if current_schedule.nil? || current_schedule.empty?
|
92
|
+
Utility::Logger.warn("No sync schedule configured for connector #{connector_settings.id}.")
|
93
|
+
return false
|
94
|
+
end
|
95
|
+
|
96
|
+
current_schedule = begin
|
97
|
+
Utility::Cron.quartz_to_crontab(current_schedule)
|
98
|
+
rescue StandardError => e
|
99
|
+
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
100
|
+
return false
|
101
|
+
end
|
102
|
+
cron_parser = Fugit::Cron.parse(current_schedule)
|
103
|
+
|
104
|
+
# Don't sync if the scheduling interval is non-parsable
|
105
|
+
unless cron_parser
|
106
|
+
Utility::Logger.error("Unable to parse sync schedule for connector #{connector_settings.id}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
|
110
|
+
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
111
|
+
|
112
|
+
# Sync if next trigger for the connector is in past
|
113
|
+
if next_trigger_time < Time.now
|
114
|
+
Utility::Logger.info("Connector #{connector_settings.id} sync is triggered by cron schedule #{current_schedule}.")
|
115
|
+
return true
|
116
|
+
end
|
117
|
+
|
118
|
+
false
|
119
|
+
end
|
120
|
+
|
121
|
+
def heartbeat_triggered?(connector_settings)
|
122
|
+
last_seen = connector_settings[:last_seen]
|
123
|
+
return true if last_seen.nil? || last_seen.empty?
|
124
|
+
last_seen = begin
|
125
|
+
Time.parse(last_seen)
|
126
|
+
rescue StandardError
|
127
|
+
Utility::Logger.warn("Unable to parse last_seen #{last_seen}")
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
return true unless last_seen
|
131
|
+
last_seen + @heartbeat_interval < Time.now
|
132
|
+
end
|
133
|
+
|
134
|
+
def configuration_triggered?(connector_settings)
|
135
|
+
connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Constants
|
11
|
+
THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze
|
12
|
+
SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
|
13
|
+
ALLOW_FIELD = '_allow_permissions'
|
14
|
+
DENY_FIELD = '_deny_permissions'
|
15
|
+
CONNECTORS_INDEX = '.elastic-connectors'
|
16
|
+
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
|
+
CONTENT_INDEX_PREFIX = 'search-'
|
18
|
+
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
+
end
|
20
|
+
end
|