connectors_utility 8.6.0.3 → 8.6.0.4.pre.20221104T202636Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/connector_settings.rb +12 -14
- data/lib/core/elastic_connector_actions.rb +72 -12
- data/lib/core/scheduler.rb +3 -0
- data/lib/utility/common.rb +20 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility.rb +1 -0
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4532477e27d803b5c9933b26b714979aa2d6f99443e1e0c40a84abc46709f8d9
|
4
|
+
data.tar.gz: 7c568080ab4dcdc479f214bfda177cabf12475dbf33ca329d2d5fdf721de42c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31865ecfc46198f5a6b8649e22f369573b766799eb177e28f25ae7a269384c63165ce7ffd2b9f41c4644d7d4e238c063237881e10e9e699ad02a220a94c41ab6
|
7
|
+
data.tar.gz: 246e0f453c9cffd5566d09d8370eecce5d4c65739ea84832350f2b9e21d2192aa560ad9b12495f9ad3027bc0b44d0f28315775292240ee3431f4c32f65d05b69
|
@@ -8,11 +8,11 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class ConnectorStatus
|
11
|
-
CREATED
|
11
|
+
CREATED = 'created'
|
12
12
|
NEEDS_CONFIGURATION = 'needs_configuration'
|
13
|
-
CONFIGURED
|
14
|
-
CONNECTED
|
15
|
-
ERROR
|
13
|
+
CONFIGURED = 'configured'
|
14
|
+
CONNECTED = 'connected'
|
15
|
+
ERROR = 'error'
|
16
16
|
|
17
17
|
STATUSES = [
|
18
18
|
CREATED,
|
@@ -8,14 +8,33 @@
|
|
8
8
|
|
9
9
|
module Connectors
|
10
10
|
class SyncStatus
|
11
|
-
|
11
|
+
PENDING = 'pending'
|
12
12
|
IN_PROGRESS = 'in_progress'
|
13
|
-
|
13
|
+
CANCELING = 'canceling'
|
14
|
+
CANCELED = 'canceled'
|
15
|
+
SUSPENDED = 'suspended'
|
16
|
+
COMPLETED = 'completed'
|
17
|
+
ERROR = 'error'
|
14
18
|
|
15
19
|
STATUSES = [
|
16
|
-
|
20
|
+
PENDING,
|
17
21
|
IN_PROGRESS,
|
18
|
-
|
22
|
+
CANCELING,
|
23
|
+
CANCELED,
|
24
|
+
SUSPENDED,
|
25
|
+
COMPLETED,
|
26
|
+
ERROR
|
27
|
+
]
|
28
|
+
|
29
|
+
PENDING_STATUES = [
|
30
|
+
PENDING,
|
31
|
+
SUSPENDED
|
32
|
+
]
|
33
|
+
|
34
|
+
TERMINAL_STATUSES = [
|
35
|
+
CANCELED,
|
36
|
+
COMPLETED,
|
37
|
+
ERROR
|
19
38
|
]
|
20
39
|
end
|
21
40
|
end
|
@@ -19,6 +19,8 @@ module Core
|
|
19
19
|
DEFAULT_REDUCE_WHITESPACE = true
|
20
20
|
DEFAULT_RUN_ML_INFERENCE = true
|
21
21
|
|
22
|
+
DEFAULT_FILTERING = {}
|
23
|
+
|
22
24
|
DEFAULT_PAGE_SIZE = 100
|
23
25
|
|
24
26
|
# Error Classes
|
@@ -80,20 +82,24 @@ module Core
|
|
80
82
|
self[:scheduling]
|
81
83
|
end
|
82
84
|
|
85
|
+
def filtering
|
86
|
+
Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
|
87
|
+
end
|
88
|
+
|
83
89
|
def request_pipeline
|
84
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
90
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
85
91
|
end
|
86
92
|
|
87
93
|
def extract_binary_content?
|
88
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
94
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
89
95
|
end
|
90
96
|
|
91
97
|
def reduce_whitespace?
|
92
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
98
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
93
99
|
end
|
94
100
|
|
95
101
|
def run_ml_inference?
|
96
|
-
return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
102
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
97
103
|
end
|
98
104
|
|
99
105
|
def formatted
|
@@ -110,8 +116,6 @@ module Core
|
|
110
116
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
111
117
|
end
|
112
118
|
|
113
|
-
private
|
114
|
-
|
115
119
|
def self.fetch_connectors_by_query(query, page_size)
|
116
120
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
117
121
|
|
@@ -120,8 +124,8 @@ module Core
|
|
120
124
|
loop do
|
121
125
|
response = ElasticConnectorActions.search_connectors(query, page_size, offset)
|
122
126
|
|
123
|
-
hits = response
|
124
|
-
total = response
|
127
|
+
hits = response.dig('hits', 'hits') || []
|
128
|
+
total = response.dig('hits', 'total', 'value') || 0
|
125
129
|
results += hits.map do |hit|
|
126
130
|
Core::ConnectorSettings.new(hit, connectors_meta)
|
127
131
|
end
|
@@ -132,11 +136,5 @@ module Core
|
|
132
136
|
results
|
133
137
|
end
|
134
138
|
|
135
|
-
def return_if_present(*args)
|
136
|
-
args.each do |arg|
|
137
|
-
return arg unless arg.nil?
|
138
|
-
end
|
139
|
-
nil
|
140
|
-
end
|
141
139
|
end
|
142
140
|
end
|
@@ -10,8 +10,21 @@ require 'active_support/core_ext/hash'
|
|
10
10
|
require 'connectors/connector_status'
|
11
11
|
require 'connectors/sync_status'
|
12
12
|
require 'utility'
|
13
|
+
require 'elastic-transport'
|
13
14
|
|
14
15
|
module Core
|
16
|
+
class JobAlreadyRunningError < StandardError
|
17
|
+
def initialize(connector_id)
|
18
|
+
super("Sync job for connector '#{connector_id}' is already running.")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ConnectorVersionChangedError < StandardError
|
23
|
+
def initialize(connector_id, seq_no, primary_term)
|
24
|
+
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
15
28
|
class ElasticConnectorActions
|
16
29
|
class << self
|
17
30
|
|
@@ -72,20 +85,53 @@ module Core
|
|
72
85
|
end
|
73
86
|
|
74
87
|
def claim_job(connector_id)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
88
|
+
seq_no = nil
|
89
|
+
primary_term = nil
|
90
|
+
sync_in_progress = false
|
91
|
+
connector_record = client.get(
|
92
|
+
:index => Utility::Constants::CONNECTORS_INDEX,
|
93
|
+
:id => connector_id,
|
94
|
+
:ignore => 404,
|
95
|
+
:refresh => true
|
96
|
+
).tap do |response|
|
97
|
+
seq_no = response['_seq_no']
|
98
|
+
primary_term = response['_primary_term']
|
99
|
+
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
100
|
+
end
|
101
|
+
if sync_in_progress
|
102
|
+
raise JobAlreadyRunningError.new(connector_id)
|
103
|
+
end
|
104
|
+
update_connector_fields(
|
105
|
+
connector_id,
|
106
|
+
{ :sync_now => false,
|
107
|
+
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
108
|
+
:last_synced => Time.now },
|
109
|
+
seq_no,
|
110
|
+
primary_term
|
111
|
+
)
|
79
112
|
|
80
113
|
body = {
|
81
114
|
:connector_id => connector_id,
|
82
115
|
:status => Connectors::SyncStatus::IN_PROGRESS,
|
83
116
|
:worker_hostname => Socket.gethostname,
|
84
|
-
:created_at => Time.now
|
117
|
+
:created_at => Time.now,
|
118
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
85
119
|
}
|
86
|
-
job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
87
120
|
|
88
|
-
|
121
|
+
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
122
|
+
end
|
123
|
+
|
124
|
+
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
|
+
return [] unless connector_filtering
|
126
|
+
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
|
+
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
128
|
+
job_filtering << {
|
129
|
+
'domain' => filtering_domain['domain'],
|
130
|
+
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
+
'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
|
132
|
+
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
|
+
}
|
134
|
+
end
|
89
135
|
end
|
90
136
|
|
91
137
|
def update_connector_status(connector_id, status, error_message = nil)
|
@@ -100,7 +146,7 @@ module Core
|
|
100
146
|
end
|
101
147
|
|
102
148
|
def complete_sync(connector_id, job_id, status)
|
103
|
-
sync_status = status[:error] ? Connectors::SyncStatus::
|
149
|
+
sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
104
150
|
|
105
151
|
update_connector_fields(connector_id,
|
106
152
|
:last_sync_status => sync_status,
|
@@ -136,7 +182,7 @@ module Core
|
|
136
182
|
}
|
137
183
|
loop do
|
138
184
|
response = client.search(:body => body)
|
139
|
-
hits = response
|
185
|
+
hits = response.dig('hits', 'hits') || []
|
140
186
|
|
141
187
|
ids = hits.map { |h| h['_id'] }
|
142
188
|
result += ids
|
@@ -242,15 +288,29 @@ module Core
|
|
242
288
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
243
289
|
end
|
244
290
|
|
245
|
-
def update_connector_fields(connector_id, doc = {})
|
291
|
+
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
246
292
|
return if doc.empty?
|
247
|
-
|
293
|
+
update_args = {
|
248
294
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
249
295
|
:id => connector_id,
|
250
296
|
:body => { :doc => doc },
|
251
297
|
:refresh => true,
|
252
298
|
:retry_on_conflict => 3
|
253
|
-
|
299
|
+
}
|
300
|
+
# seq_no and primary_term are used for optimistic concurrency control
|
301
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
302
|
+
if seq_no && primary_term
|
303
|
+
update_args[:if_seq_no] = seq_no
|
304
|
+
update_args[:if_primary_term] = primary_term
|
305
|
+
update_args.delete(:retry_on_conflict)
|
306
|
+
end
|
307
|
+
begin
|
308
|
+
client.update(update_args)
|
309
|
+
rescue Elastic::Transport::Transport::Errors::Conflict
|
310
|
+
# VersionConflictException
|
311
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
312
|
+
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
313
|
+
end
|
254
314
|
end
|
255
315
|
|
256
316
|
private
|
data/lib/core/scheduler.rb
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
require 'time'
|
10
10
|
require 'fugit'
|
11
11
|
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility/cron'
|
13
14
|
require 'utility/logger'
|
14
15
|
require 'utility/exception_tracking'
|
@@ -41,6 +42,8 @@ module Core
|
|
41
42
|
if @is_shutting_down
|
42
43
|
break
|
43
44
|
end
|
45
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
46
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
44
47
|
rescue StandardError => e
|
45
48
|
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
49
|
ensure
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Common
|
11
|
+
class << self
|
12
|
+
def return_if_present(*args)
|
13
|
+
args.each do |arg|
|
14
|
+
return arg unless arg.nil?
|
15
|
+
end
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/utility/errors.rb
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#
|
6
6
|
|
7
7
|
require 'active_support/core_ext/string'
|
8
|
+
require 'elasticsearch'
|
8
9
|
|
9
10
|
module Utility
|
10
11
|
class DocumentError
|
@@ -31,6 +32,8 @@ module Utility
|
|
31
32
|
end
|
32
33
|
|
33
34
|
class ClientError < StandardError; end
|
35
|
+
|
36
|
+
class InvalidFilterConfigError < StandardError; end
|
34
37
|
class EvictionWithNoProgressError < StandardError; end
|
35
38
|
class EvictionError < StandardError
|
36
39
|
attr_accessor :cursors
|
@@ -89,6 +92,7 @@ module Utility
|
|
89
92
|
class InvalidTokenError < StandardError; end
|
90
93
|
class TokenRefreshFailedError < StandardError; end
|
91
94
|
class ConnectorNotAvailableError < StandardError; end
|
95
|
+
class AuthorizationError < StandardError; end
|
92
96
|
|
93
97
|
# For when we want to explicitly set a #cause but can't
|
94
98
|
class ExplicitlyCausedError < StandardError
|
@@ -124,6 +128,7 @@ module Utility
|
|
124
128
|
end
|
125
129
|
end
|
126
130
|
|
131
|
+
AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
|
127
132
|
INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
|
128
133
|
INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
|
129
134
|
UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
|
data/lib/utility/es_client.rb
CHANGED
@@ -20,8 +20,8 @@ module Utility
|
|
20
20
|
attr_reader :cause
|
21
21
|
end
|
22
22
|
|
23
|
-
def initialize(es_config)
|
24
|
-
super(connection_configs(es_config))
|
23
|
+
def initialize(es_config, &block)
|
24
|
+
super(connection_configs(es_config), &block)
|
25
25
|
end
|
26
26
|
|
27
27
|
def connection_configs(es_config)
|
@@ -39,6 +39,10 @@ module Utility
|
|
39
39
|
configs[:log] = es_config[:log] || false
|
40
40
|
configs[:trace] = es_config[:trace] || false
|
41
41
|
|
42
|
+
# transport options
|
43
|
+
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
|
+
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
|
+
|
42
46
|
# if log or trace is activated, we use the application logger
|
43
47
|
configs[:logger] = if configs[:log] || configs[:trace]
|
44
48
|
Utility::Logger.logger
|
data/lib/utility.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.
|
4
|
+
version: 8.6.0.4.pre.20221104T202636Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -110,6 +110,7 @@ files:
|
|
110
110
|
- lib/core/elastic_connector_actions.rb
|
111
111
|
- lib/core/scheduler.rb
|
112
112
|
- lib/utility.rb
|
113
|
+
- lib/utility/common.rb
|
113
114
|
- lib/utility/constants.rb
|
114
115
|
- lib/utility/cron.rb
|
115
116
|
- lib/utility/elasticsearch/index/language_data.yml
|
@@ -125,9 +126,9 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
125
126
|
licenses:
|
126
127
|
- Elastic-2.0
|
127
128
|
metadata:
|
128
|
-
revision:
|
129
|
-
repository:
|
130
|
-
post_install_message:
|
129
|
+
revision: 5e2710b9b4af2fed4c7b95502327260a3ebf9e25
|
130
|
+
repository: git@github.com:elastic/ent-search-connectors.git
|
131
|
+
post_install_message:
|
131
132
|
rdoc_options: []
|
132
133
|
require_paths:
|
133
134
|
- lib
|
@@ -138,12 +139,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
138
139
|
version: '0'
|
139
140
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
141
|
requirements:
|
141
|
-
- - "
|
142
|
+
- - ">"
|
142
143
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
144
|
+
version: 1.3.1
|
144
145
|
requirements: []
|
145
146
|
rubygems_version: 3.0.3.1
|
146
|
-
signing_key:
|
147
|
+
signing_key:
|
147
148
|
specification_version: 4
|
148
149
|
summary: Gem containing shared Connector Services libraries
|
149
150
|
test_files: []
|