connectors_utility 8.6.0.3 → 8.6.0.4.pre.20221104T201057Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/connector_settings.rb +12 -14
- data/lib/core/elastic_connector_actions.rb +72 -12
- data/lib/core/scheduler.rb +3 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility.rb +1 -0
- metadata +9 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75130f5f1747db0119b4619066fa71a9502d82c86838966c9453541704dc5e04
|
4
|
+
data.tar.gz: a1dfebbed9be1639600cce8f35f49800d9a5538e97a4ed57d8674d81796902d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6faaf8c6151c35c0923304cd11692a03bca2cf4f8f0544d38a76e30eb90ec3090e5f8b5a8249d1bd4f4724c0a4712b1ffc58a1d8be1d46fa18e0a1d1f5e2e045
|
7
|
+
data.tar.gz: 57de1647be647f92b9934445034203ed762a453f62e5ce1b4554dff2c1632370eb86bd3b7ff44b2a6380d4ab75f691e8c0c70a39e5caecdfee27f876c475a73f
|
@@ -8,11 +8,11 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class ConnectorStatus
|
11
|
-
CREATED
|
11
|
+
CREATED = 'created'
|
12
12
|
NEEDS_CONFIGURATION = 'needs_configuration'
|
13
|
-
CONFIGURED
|
14
|
-
CONNECTED
|
15
|
-
ERROR
|
13
|
+
CONFIGURED = 'configured'
|
14
|
+
CONNECTED = 'connected'
|
15
|
+
ERROR = 'error'
|
16
16
|
|
17
17
|
STATUSES = [
|
18
18
|
CREATED,
|
@@ -8,14 +8,33 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class SyncStatus
|
11
|
-
|
11
|
+
PENDING = 'pending'
|
12
12
|
IN_PROGRESS = 'in_progress'
|
13
|
-
|
13
|
+
CANCELING = 'canceling'
|
14
|
+
CANCELED = 'canceled'
|
15
|
+
SUSPENDED = 'suspended'
|
16
|
+
COMPLETED = 'completed'
|
17
|
+
ERROR = 'error'
|
14
18
|
|
15
19
|
STATUSES = [
|
16
|
-
|
20
|
+
PENDING,
|
17
21
|
IN_PROGRESS,
|
18
|
-
|
22
|
+
CANCELING,
|
23
|
+
CANCELED,
|
24
|
+
SUSPENDED,
|
25
|
+
COMPLETED,
|
26
|
+
ERROR
|
27
|
+
]
|
28
|
+
|
29
|
+
PENDING_STATUES = [
|
30
|
+
PENDING,
|
31
|
+
SUSPENDED
|
32
|
+
]
|
33
|
+
|
34
|
+
TERMINAL_STATUSES = [
|
35
|
+
CANCELED,
|
36
|
+
COMPLETED,
|
37
|
+
ERROR
|
19
38
|
]
|
20
39
|
end
|
21
40
|
end
|
@@ -19,6 +19,8 @@ module Core
|
|
19
19
|
DEFAULT_REDUCE_WHITESPACE = true
|
20
20
|
DEFAULT_RUN_ML_INFERENCE = true
|
21
21
|
|
22
|
+
DEFAULT_FILTERING = {}
|
23
|
+
|
22
24
|
DEFAULT_PAGE_SIZE = 100
|
23
25
|
|
24
26
|
# Error Classes
|
@@ -80,20 +82,24 @@ module Core
|
|
80
82
|
self[:scheduling]
|
81
83
|
end
|
82
84
|
|
85
|
+
def filtering
|
86
|
+
Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
|
87
|
+
end
|
88
|
+
|
83
89
|
def request_pipeline
|
84
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
90
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
91
|
end
|
86
92
|
|
87
93
|
def extract_binary_content?
|
88
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
94
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
95
|
end
|
90
96
|
|
91
97
|
def reduce_whitespace?
|
92
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
98
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
99
|
end
|
94
100
|
|
95
101
|
def run_ml_inference?
|
96
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
102
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
103
|
end
|
98
104
|
|
99
105
|
def formatted
|
@@ -110,8 +116,6 @@ module Core
|
|
110
116
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
111
117
|
end
|
112
118
|
|
113
|
-
private
|
114
|
-
|
115
119
|
def self.fetch_connectors_by_query(query, page_size)
|
116
120
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
121
|
|
@@ -120,8 +124,8 @@ module Core
|
|
120
124
|
loop do
|
121
125
|
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
126
|
|
123
|
-
hits = response
|
124
|
-
total = response
|
127
|
+
hits = response.dig('hits', 'hits') || []
|
128
|
+
total = response.dig('hits', 'total', 'value') || 0
|
125
129
|
results += hits.map do |hit|
|
126
130
|
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
131
|
end
|
@@ -132,11 +136,5 @@ module Core
|
|
132
136
|
results
|
133
137
|
end
|
134
138
|
|
135
|
-
def return_if_present(*args)
|
136
|
-
args.each do |arg|
|
137
|
-
return arg unless arg.nil?
|
138
|
-
end
|
139
|
-
nil
|
140
|
-
end
|
141
139
|
end
|
142
140
|
end
|
@@ -10,8 +10,21 @@ require 'active_support/core_ext/hash'
|
|
10
10
|
require 'connectors/connector_status'
|
11
11
|
require 'connectors/sync_status'
|
12
12
|
require 'utility'
|
13
|
+
require 'elastic-transport'
|
13
14
|
|
14
15
|
module Core
|
16
|
+
class JobAlreadyRunningError < StandardError
|
17
|
+
def initialize(connector_id)
|
18
|
+
super("Sync job for connector '#{connector_id}' is already running.")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ConnectorVersionChangedError < StandardError
|
23
|
+
def initialize(connector_id, seq_no, primary_term)
|
24
|
+
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
15
28
|
class ElasticConnectorActions
|
16
29
|
class << self
|
17
30
|
|
@@ -72,20 +85,53 @@ module Core
|
|
72
85
|
end
|
73
86
|
|
74
87
|
def claim_job(connector_id)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
88
|
+
seq_no = nil
|
89
|
+
primary_term = nil
|
90
|
+
sync_in_progress = false
|
91
|
+
connector_record = client.get(
|
92
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
93
|
+
:id => connector_id,
|
94
|
+
:ignore => 404,
|
95
|
+
:refresh => true
|
96
|
+
).tap do |response|
|
97
|
+
seq_no = response['_seq_no']
|
98
|
+
primary_term = response['_primary_term']
|
99
|
+
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
100
|
+
end
|
101
|
+
if sync_in_progress
|
102
|
+
raise JobAlreadyRunningError.new(connector_id)
|
103
|
+
end
|
104
|
+
update_connector_fields(
|
105
|
+
connector_id,
|
106
|
+
{ :sync_now => false,
|
107
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
+
:last_synced => Time.now },
|
109
|
+
seq_no,
|
110
|
+
primary_term
|
111
|
+
)
|
79
112
|
|
80
113
|
body = {
|
81
114
|
:connector_id => connector_id,
|
82
115
|
:status => Connectors::SyncStatus::IN_PROGRESS,
|
83
116
|
:worker_hostname => Socket.gethostname,
|
84
|
-
:created_at => Time.now
|
117
|
+
:created_at => Time.now,
|
118
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
85
119
|
}
|
86
|
-
job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
87
120
|
|
88
|
-
|
121
|
+
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
122
|
+
end
|
123
|
+
|
124
|
+
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
|
+
return [] unless connector_filtering
|
126
|
+
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
|
+
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
128
|
+
job_filtering << {
|
129
|
+
'domain' => filtering_domain['domain'],
|
130
|
+
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
+
'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
|
132
|
+
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
|
+
}
|
134
|
+
end
|
89
135
|
end
|
90
136
|
|
91
137
|
def update_connector_status(connector_id, status, error_message = nil)
|
@@ -100,7 +146,7 @@ module Core
|
|
100
146
|
end
|
101
147
|
|
102
148
|
def complete_sync(connector_id, job_id, status)
|
103
|
-
sync_status = status[:error] ? Connectors::SyncStatus::
|
149
|
+
sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
104
150
|
|
105
151
|
update_connector_fields(connector_id,
|
106
152
|
:last_sync_status => sync_status,
|
@@ -136,7 +182,7 @@ module Core
|
|
136
182
|
}
|
137
183
|
loop do
|
138
184
|
response = client.search(:body => body)
|
139
|
-
hits = response
|
185
|
+
hits = response.dig('hits', 'hits') || []
|
140
186
|
|
141
187
|
ids = hits.map { |h| h['_id'] }
|
142
188
|
result += ids
|
@@ -242,15 +288,29 @@ module Core
|
|
242
288
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
243
289
|
end
|
244
290
|
|
245
|
-
def update_connector_fields(connector_id, doc = {})
|
291
|
+
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
246
292
|
return if doc.empty?
|
247
|
-
|
293
|
+
update_args = {
|
248
294
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
249
295
|
:id => connector_id,
|
250
296
|
:body => { :doc => doc },
|
251
297
|
:refresh => true,
|
252
298
|
:retry_on_conflict => 3
|
253
|
-
|
299
|
+
}
|
300
|
+
# seq_no and primary_term are used for optimistic concurrency control
|
301
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
302
|
+
if seq_no && primary_term
|
303
|
+
update_args[:if_seq_no] = seq_no
|
304
|
+
update_args[:if_primary_term] = primary_term
|
305
|
+
update_args.delete(:retry_on_conflict)
|
306
|
+
end
|
307
|
+
begin
|
308
|
+
client.update(update_args)
|
309
|
+
rescue Elastic::Transport::Transport::Errors::Conflict
|
310
|
+
# VersionConflictException
|
311
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
312
|
+
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
313
|
+
end
|
254
314
|
end
|
255
315
|
|
256
316
|
private
|
data/lib/core/scheduler.rb
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
require 'time'
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility/cron'
|
13
14
|
require 'utility/logger'
|
14
15
|
require 'utility/exception_tracking'
|
@@ -41,6 +42,8 @@ module Core
|
|
41
42
|
if @is_shutting_down
|
42
43
|
break
|
43
44
|
end
|
45
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
44
47
|
rescue StandardError => e
|
45
48
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
49
|
ensure
|
data/lib/utility/errors.rb
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#
|
6
6
|
|
7
7
|
require 'active_support/core_ext/string'
|
8
|
+
require 'elasticsearch'
|
8
9
|
|
9
10
|
module Utility
|
10
11
|
class DocumentError
|
@@ -31,6 +32,8 @@ module Utility
|
|
31
32
|
end
|
32
33
|
|
33
34
|
class ClientError < StandardError; end
|
35
|
+
|
36
|
+
class InvalidFilterConfigError < StandardError; end
|
34
37
|
class EvictionWithNoProgressError < StandardError; end
|
35
38
|
class EvictionError < StandardError
|
36
39
|
attr_accessor :cursors
|
@@ -89,6 +92,7 @@ module Utility
|
|
89
92
|
class InvalidTokenError < StandardError; end
|
90
93
|
class TokenRefreshFailedError < StandardError; end
|
91
94
|
class ConnectorNotAvailableError < StandardError; end
|
95
|
+
class AuthorizationError < StandardError; end
|
92
96
|
|
93
97
|
# For when we want to explicitly set a #cause but can't
|
94
98
|
class ExplicitlyCausedError < StandardError
|
@@ -124,6 +128,7 @@ module Utility
|
|
124
128
|
end
|
125
129
|
end
|
126
130
|
|
131
|
+
AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
|
127
132
|
INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
|
128
133
|
INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
|
129
134
|
UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
|
data/lib/utility/es_client.rb
CHANGED
@@ -20,8 +20,8 @@ module Utility
|
|
20
20
|
attr_reader :cause
|
21
21
|
end
|
22
22
|
|
23
|
-
def initialize(es_config)
|
24
|
-
super(connection_configs(es_config))
|
23
|
+
def initialize(es_config, &block)
|
24
|
+
super(connection_configs(es_config), &block)
|
25
25
|
end
|
26
26
|
|
27
27
|
def connection_configs(es_config)
|
@@ -39,6 +39,10 @@ module Utility
|
|
39
39
|
configs[:log] = es_config[:log] || false
|
40
40
|
configs[:trace] = es_config[:trace] || false
|
41
41
|
|
42
|
+
# transport options
|
43
|
+
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
|
+
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
|
+
|
42
46
|
# if log or trace is activated, we use the application logger
|
43
47
|
configs[:logger] = if configs[:log] || configs[:trace]
|
44
48
|
Utility::Logger.logger
|
data/lib/utility.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.
|
4
|
+
version: 8.6.0.4.pre.20221104T201057Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -125,9 +125,9 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
125
125
|
licenses:
|
126
126
|
- Elastic-2.0
|
127
127
|
metadata:
|
128
|
-
revision:
|
129
|
-
repository:
|
130
|
-
post_install_message:
|
128
|
+
revision: 2051b3907639a1fe2ae68efdc33c06cf12d38383
|
129
|
+
repository: git@github.com:elastic/ent-search-connectors.git
|
130
|
+
post_install_message:
|
131
131
|
rdoc_options: []
|
132
132
|
require_paths:
|
133
133
|
- lib
|
@@ -138,12 +138,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
138
138
|
version: '0'
|
139
139
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
140
|
requirements:
|
141
|
-
- - "
|
141
|
+
- - ">"
|
142
142
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
143
|
+
version: 1.3.1
|
144
144
|
requirements: []
|
145
145
|
rubygems_version: 3.0.3.1
|
146
|
-
signing_key:
|
146
|
+
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Gem containing shared Connector Services libraries
|
149
149
|
test_files: []
|