connectors_utility 8.6.0.3 → 8.6.0.4.pre.20221104T201057Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/connector_settings.rb +12 -14
- data/lib/core/elastic_connector_actions.rb +72 -12
- data/lib/core/scheduler.rb +3 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility.rb +1 -0
- metadata +9 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75130f5f1747db0119b4619066fa71a9502d82c86838966c9453541704dc5e04
|
4
|
+
data.tar.gz: a1dfebbed9be1639600cce8f35f49800d9a5538e97a4ed57d8674d81796902d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6faaf8c6151c35c0923304cd11692a03bca2cf4f8f0544d38a76e30eb90ec3090e5f8b5a8249d1bd4f4724c0a4712b1ffc58a1d8be1d46fa18e0a1d1f5e2e045
|
7
|
+
data.tar.gz: 57de1647be647f92b9934445034203ed762a453f62e5ce1b4554dff2c1632370eb86bd3b7ff44b2a6380d4ab75f691e8c0c70a39e5caecdfee27f876c475a73f
|
@@ -8,11 +8,11 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class ConnectorStatus
|
11
|
-
CREATED
|
11
|
+
CREATED = 'created'
|
12
12
|
NEEDS_CONFIGURATION = 'needs_configuration'
|
13
|
-
CONFIGURED
|
14
|
-
CONNECTED
|
15
|
-
ERROR
|
13
|
+
CONFIGURED = 'configured'
|
14
|
+
CONNECTED = 'connected'
|
15
|
+
ERROR = 'error'
|
16
16
|
|
17
17
|
STATUSES = [
|
18
18
|
CREATED,
|
@@ -8,14 +8,33 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class SyncStatus
|
11
|
-
|
11
|
+
PENDING = 'pending'
|
12
12
|
IN_PROGRESS = 'in_progress'
|
13
|
-
|
13
|
+
CANCELING = 'canceling'
|
14
|
+
CANCELED = 'canceled'
|
15
|
+
SUSPENDED = 'suspended'
|
16
|
+
COMPLETED = 'completed'
|
17
|
+
ERROR = 'error'
|
14
18
|
|
15
19
|
STATUSES = [
|
16
|
-
|
20
|
+
PENDING,
|
17
21
|
IN_PROGRESS,
|
18
|
-
|
22
|
+
CANCELING,
|
23
|
+
CANCELED,
|
24
|
+
SUSPENDED,
|
25
|
+
COMPLETED,
|
26
|
+
ERROR
|
27
|
+
]
|
28
|
+
|
29
|
+
PENDING_STATUES = [
|
30
|
+
PENDING,
|
31
|
+
SUSPENDED
|
32
|
+
]
|
33
|
+
|
34
|
+
TERMINAL_STATUSES = [
|
35
|
+
CANCELED,
|
36
|
+
COMPLETED,
|
37
|
+
ERROR
|
19
38
|
]
|
20
39
|
end
|
21
40
|
end
|
@@ -19,6 +19,8 @@ module Core
|
|
19
19
|
DEFAULT_REDUCE_WHITESPACE = true
|
20
20
|
DEFAULT_RUN_ML_INFERENCE = true
|
21
21
|
|
22
|
+
DEFAULT_FILTERING = {}
|
23
|
+
|
22
24
|
DEFAULT_PAGE_SIZE = 100
|
23
25
|
|
24
26
|
# Error Classes
|
@@ -80,20 +82,24 @@ module Core
|
|
80
82
|
self[:scheduling]
|
81
83
|
end
|
82
84
|
|
85
|
+
def filtering
|
86
|
+
Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
|
87
|
+
end
|
88
|
+
|
83
89
|
def request_pipeline
|
84
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
90
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
91
|
end
|
86
92
|
|
87
93
|
def extract_binary_content?
|
88
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
94
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
95
|
end
|
90
96
|
|
91
97
|
def reduce_whitespace?
|
92
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
98
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
99
|
end
|
94
100
|
|
95
101
|
def run_ml_inference?
|
96
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
102
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
103
|
end
|
98
104
|
|
99
105
|
def formatted
|
@@ -110,8 +116,6 @@ module Core
|
|
110
116
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
111
117
|
end
|
112
118
|
|
113
|
-
private
|
114
|
-
|
115
119
|
def self.fetch_connectors_by_query(query, page_size)
|
116
120
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
121
|
|
@@ -120,8 +124,8 @@ module Core
|
|
120
124
|
loop do
|
121
125
|
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
126
|
|
123
|
-
hits = response
|
124
|
-
total = response
|
127
|
+
hits = response.dig('hits', 'hits') || []
|
128
|
+
total = response.dig('hits', 'total', 'value') || 0
|
125
129
|
results += hits.map do |hit|
|
126
130
|
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
131
|
end
|
@@ -132,11 +136,5 @@ module Core
|
|
132
136
|
results
|
133
137
|
end
|
134
138
|
|
135
|
-
def return_if_present(*args)
|
136
|
-
args.each do |arg|
|
137
|
-
return arg unless arg.nil?
|
138
|
-
end
|
139
|
-
nil
|
140
|
-
end
|
141
139
|
end
|
142
140
|
end
|
@@ -10,8 +10,21 @@ require 'active_support/core_ext/hash'
|
|
10
10
|
require 'connectors/connector_status'
|
11
11
|
require 'connectors/sync_status'
|
12
12
|
require 'utility'
|
13
|
+
require 'elastic-transport'
|
13
14
|
|
14
15
|
module Core
|
16
|
+
class JobAlreadyRunningError < StandardError
|
17
|
+
def initialize(connector_id)
|
18
|
+
super("Sync job for connector '#{connector_id}' is already running.")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ConnectorVersionChangedError < StandardError
|
23
|
+
def initialize(connector_id, seq_no, primary_term)
|
24
|
+
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
15
28
|
class ElasticConnectorActions
|
16
29
|
class << self
|
17
30
|
|
@@ -72,20 +85,53 @@ module Core
|
|
72
85
|
end
|
73
86
|
|
74
87
|
def claim_job(connector_id)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
88
|
+
seq_no = nil
|
89
|
+
primary_term = nil
|
90
|
+
sync_in_progress = false
|
91
|
+
connector_record = client.get(
|
92
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
93
|
+
:id => connector_id,
|
94
|
+
:ignore => 404,
|
95
|
+
:refresh => true
|
96
|
+
).tap do |response|
|
97
|
+
seq_no = response['_seq_no']
|
98
|
+
primary_term = response['_primary_term']
|
99
|
+
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
100
|
+
end
|
101
|
+
if sync_in_progress
|
102
|
+
raise JobAlreadyRunningError.new(connector_id)
|
103
|
+
end
|
104
|
+
update_connector_fields(
|
105
|
+
connector_id,
|
106
|
+
{ :sync_now => false,
|
107
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
+
:last_synced => Time.now },
|
109
|
+
seq_no,
|
110
|
+
primary_term
|
111
|
+
)
|
79
112
|
|
80
113
|
body = {
|
81
114
|
:connector_id => connector_id,
|
82
115
|
:status => Connectors::SyncStatus::IN_PROGRESS,
|
83
116
|
:worker_hostname => Socket.gethostname,
|
84
|
-
:created_at => Time.now
|
117
|
+
:created_at => Time.now,
|
118
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
85
119
|
}
|
86
|
-
job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
87
120
|
|
88
|
-
|
121
|
+
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
122
|
+
end
|
123
|
+
|
124
|
+
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
|
+
return [] unless connector_filtering
|
126
|
+
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
|
+
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
128
|
+
job_filtering << {
|
129
|
+
'domain' => filtering_domain['domain'],
|
130
|
+
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
+
'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
|
132
|
+
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
|
+
}
|
134
|
+
end
|
89
135
|
end
|
90
136
|
|
91
137
|
def update_connector_status(connector_id, status, error_message = nil)
|
@@ -100,7 +146,7 @@ module Core
|
|
100
146
|
end
|
101
147
|
|
102
148
|
def complete_sync(connector_id, job_id, status)
|
103
|
-
sync_status = status[:error] ? Connectors::SyncStatus::
|
149
|
+
sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
104
150
|
|
105
151
|
update_connector_fields(connector_id,
|
106
152
|
:last_sync_status => sync_status,
|
@@ -136,7 +182,7 @@ module Core
|
|
136
182
|
}
|
137
183
|
loop do
|
138
184
|
response = client.search(:body => body)
|
139
|
-
hits = response
|
185
|
+
hits = response.dig('hits', 'hits') || []
|
140
186
|
|
141
187
|
ids = hits.map { |h| h['_id'] }
|
142
188
|
result += ids
|
@@ -242,15 +288,29 @@ module Core
|
|
242
288
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
243
289
|
end
|
244
290
|
|
245
|
-
def update_connector_fields(connector_id, doc = {})
|
291
|
+
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
246
292
|
return if doc.empty?
|
247
|
-
|
293
|
+
update_args = {
|
248
294
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
249
295
|
:id => connector_id,
|
250
296
|
:body => { :doc => doc },
|
251
297
|
:refresh => true,
|
252
298
|
:retry_on_conflict => 3
|
253
|
-
|
299
|
+
}
|
300
|
+
# seq_no and primary_term are used for optimistic concurrency control
|
301
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
302
|
+
if seq_no && primary_term
|
303
|
+
update_args[:if_seq_no] = seq_no
|
304
|
+
update_args[:if_primary_term] = primary_term
|
305
|
+
update_args.delete(:retry_on_conflict)
|
306
|
+
end
|
307
|
+
begin
|
308
|
+
client.update(update_args)
|
309
|
+
rescue Elastic::Transport::Transport::Errors::Conflict
|
310
|
+
# VersionConflictException
|
311
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
312
|
+
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
313
|
+
end
|
254
314
|
end
|
255
315
|
|
256
316
|
private
|
data/lib/core/scheduler.rb
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
require 'time'
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility/cron'
|
13
14
|
require 'utility/logger'
|
14
15
|
require 'utility/exception_tracking'
|
@@ -41,6 +42,8 @@ module Core
|
|
41
42
|
if @is_shutting_down
|
42
43
|
break
|
43
44
|
end
|
45
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
44
47
|
rescue StandardError => e
|
45
48
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
49
|
ensure
|
data/lib/utility/errors.rb
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#
|
6
6
|
|
7
7
|
require 'active_support/core_ext/string'
|
8
|
+
require 'elasticsearch'
|
8
9
|
|
9
10
|
module Utility
|
10
11
|
class DocumentError
|
@@ -31,6 +32,8 @@ module Utility
|
|
31
32
|
end
|
32
33
|
|
33
34
|
class ClientError < StandardError; end
|
35
|
+
|
36
|
+
class InvalidFilterConfigError < StandardError; end
|
34
37
|
class EvictionWithNoProgressError < StandardError; end
|
35
38
|
class EvictionError < StandardError
|
36
39
|
attr_accessor :cursors
|
@@ -89,6 +92,7 @@ module Utility
|
|
89
92
|
class InvalidTokenError < StandardError; end
|
90
93
|
class TokenRefreshFailedError < StandardError; end
|
91
94
|
class ConnectorNotAvailableError < StandardError; end
|
95
|
+
class AuthorizationError < StandardError; end
|
92
96
|
|
93
97
|
# For when we want to explicitly set a #cause but can't
|
94
98
|
class ExplicitlyCausedError < StandardError
|
@@ -124,6 +128,7 @@ module Utility
|
|
124
128
|
end
|
125
129
|
end
|
126
130
|
|
131
|
+
AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
|
127
132
|
INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
|
128
133
|
INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
|
129
134
|
UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
|
data/lib/utility/es_client.rb
CHANGED
@@ -20,8 +20,8 @@ module Utility
|
|
20
20
|
attr_reader :cause
|
21
21
|
end
|
22
22
|
|
23
|
-
def initialize(es_config)
|
24
|
-
super(connection_configs(es_config))
|
23
|
+
def initialize(es_config, &block)
|
24
|
+
super(connection_configs(es_config), &block)
|
25
25
|
end
|
26
26
|
|
27
27
|
def connection_configs(es_config)
|
@@ -39,6 +39,10 @@ module Utility
|
|
39
39
|
configs[:log] = es_config[:log] || false
|
40
40
|
configs[:trace] = es_config[:trace] || false
|
41
41
|
|
42
|
+
# transport options
|
43
|
+
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
|
+
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
|
+
|
42
46
|
# if log or trace is activated, we use the application logger
|
43
47
|
configs[:logger] = if configs[:log] || configs[:trace]
|
44
48
|
Utility::Logger.logger
|
data/lib/utility.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.
|
4
|
+
version: 8.6.0.4.pre.20221104T201057Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -125,9 +125,9 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
125
125
|
licenses:
|
126
126
|
- Elastic-2.0
|
127
127
|
metadata:
|
128
|
-
revision:
|
129
|
-
repository:
|
130
|
-
post_install_message:
|
128
|
+
revision: 2051b3907639a1fe2ae68efdc33c06cf12d38383
|
129
|
+
repository: git@github.com:elastic/ent-search-connectors.git
|
130
|
+
post_install_message:
|
131
131
|
rdoc_options: []
|
132
132
|
require_paths:
|
133
133
|
- lib
|
@@ -138,12 +138,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
138
138
|
version: '0'
|
139
139
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
140
|
requirements:
|
141
|
-
- - "
|
141
|
+
- - ">"
|
142
142
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
143
|
+
version: 1.3.1
|
144
144
|
requirements: []
|
145
145
|
rubygems_version: 3.0.3.1
|
146
|
-
signing_key:
|
146
|
+
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Gem containing shared Connector Services libraries
|
149
149
|
test_files: []
|