connectors_utility 8.6.0.2 → 8.6.0.4.pre.20221104T201057Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2ca64052bb1f5b2fc108b3610293b26783d42dc7aa957426bad95415d456527
4
- data.tar.gz: c40091f7500e1ef3c39336e1f44a92e6ba9663ad26849af79283543283c3106b
3
+ metadata.gz: 75130f5f1747db0119b4619066fa71a9502d82c86838966c9453541704dc5e04
4
+ data.tar.gz: a1dfebbed9be1639600cce8f35f49800d9a5538e97a4ed57d8674d81796902d1
5
5
  SHA512:
6
- metadata.gz: ca920820800ac34e4e3338c3d0c06799b6e18dcaefd588ef6830bbf5062c453ac30532622dc416b3949e831aee5dd683db68d1227fc2c3ce25bf17ec0ce7da25
7
- data.tar.gz: 706728e5dc9e7c0c2dbb6dcdeba46f8efa4efe8ee42028adb47256de710702d928095143f261d92e257ae123797e0e6f9c2da918cf54c76979e70acf1fead187
6
+ metadata.gz: 6faaf8c6151c35c0923304cd11692a03bca2cf4f8f0544d38a76e30eb90ec3090e5f8b5a8249d1bd4f4724c0a4712b1ffc58a1d8be1d46fa18e0a1d1f5e2e045
7
+ data.tar.gz: 57de1647be647f92b9934445034203ed762a453f62e5ce1b4554dff2c1632370eb86bd3b7ff44b2a6380d4ab75f691e8c0c70a39e5caecdfee27f876c475a73f
@@ -8,11 +8,11 @@
8
8
 
9
9
  module Connectors
10
10
  class ConnectorStatus
11
- CREATED = 'created'
11
+ CREATED = 'created'
12
12
  NEEDS_CONFIGURATION = 'needs_configuration'
13
- CONFIGURED = 'configured'
14
- CONNECTED = 'connected'
15
- ERROR = 'error'
13
+ CONFIGURED = 'configured'
14
+ CONNECTED = 'connected'
15
+ ERROR = 'error'
16
16
 
17
17
  STATUSES = [
18
18
  CREATED,
@@ -8,14 +8,33 @@
8
8
 
9
9
  module Connectors
10
10
  class SyncStatus
11
- COMPLETED = 'completed'
11
+ PENDING = 'pending'
12
12
  IN_PROGRESS = 'in_progress'
13
- FAILED = 'failed'
13
+ CANCELING = 'canceling'
14
+ CANCELED = 'canceled'
15
+ SUSPENDED = 'suspended'
16
+ COMPLETED = 'completed'
17
+ ERROR = 'error'
14
18
 
15
19
  STATUSES = [
16
- COMPLETED,
20
+ PENDING,
17
21
  IN_PROGRESS,
18
- FAILED
22
+ CANCELING,
23
+ CANCELED,
24
+ SUSPENDED,
25
+ COMPLETED,
26
+ ERROR
27
+ ]
28
+
29
+ PENDING_STATUES = [
30
+ PENDING,
31
+ SUSPENDED
32
+ ]
33
+
34
+ TERMINAL_STATUSES = [
35
+ CANCELED,
36
+ COMPLETED,
37
+ ERROR
19
38
  ]
20
39
  end
21
40
  end
@@ -19,6 +19,8 @@ module Core
19
19
  DEFAULT_REDUCE_WHITESPACE = true
20
20
  DEFAULT_RUN_ML_INFERENCE = true
21
21
 
22
+ DEFAULT_FILTERING = {}
23
+
22
24
  DEFAULT_PAGE_SIZE = 100
23
25
 
24
26
  # Error Classes
@@ -80,20 +82,24 @@ module Core
80
82
  self[:scheduling]
81
83
  end
82
84
 
85
+ def filtering
86
+ Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
87
+ end
88
+
83
89
  def request_pipeline
84
- return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
90
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
85
91
  end
86
92
 
87
93
  def extract_binary_content?
88
- return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
94
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
89
95
  end
90
96
 
91
97
  def reduce_whitespace?
92
- return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
98
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
93
99
  end
94
100
 
95
101
  def run_ml_inference?
96
- return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
102
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
97
103
  end
98
104
 
99
105
  def formatted
@@ -110,8 +116,6 @@ module Core
110
116
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
111
117
  end
112
118
 
113
- private
114
-
115
119
  def self.fetch_connectors_by_query(query, page_size)
116
120
  connectors_meta = ElasticConnectorActions.connectors_meta
117
121
 
@@ -120,8 +124,8 @@ module Core
120
124
  loop do
121
125
  response = ElasticConnectorActions.search_connectors(query, page_size, offset)
122
126
 
123
- hits = response['hits']['hits']
124
- total = response['hits']['total']['value']
127
+ hits = response.dig('hits', 'hits') || []
128
+ total = response.dig('hits', 'total', 'value') || 0
125
129
  results += hits.map do |hit|
126
130
  Core::ConnectorSettings.new(hit, connectors_meta)
127
131
  end
@@ -132,11 +136,5 @@ module Core
132
136
  results
133
137
  end
134
138
 
135
- def return_if_present(*args)
136
- args.each do |arg|
137
- return arg unless arg.nil?
138
- end
139
- nil
140
- end
141
139
  end
142
140
  end
@@ -10,8 +10,21 @@ require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
11
  require 'connectors/sync_status'
12
12
  require 'utility'
13
+ require 'elastic-transport'
13
14
 
14
15
  module Core
16
+ class JobAlreadyRunningError < StandardError
17
+ def initialize(connector_id)
18
+ super("Sync job for connector '#{connector_id}' is already running.")
19
+ end
20
+ end
21
+
22
+ class ConnectorVersionChangedError < StandardError
23
+ def initialize(connector_id, seq_no, primary_term)
24
+ super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
25
+ end
26
+ end
27
+
15
28
  class ElasticConnectorActions
16
29
  class << self
17
30
 
@@ -72,20 +85,53 @@ module Core
72
85
  end
73
86
 
74
87
  def claim_job(connector_id)
75
- update_connector_fields(connector_id,
76
- :sync_now => false,
77
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
- :last_synced => Time.now)
88
+ seq_no = nil
89
+ primary_term = nil
90
+ sync_in_progress = false
91
+ connector_record = client.get(
92
+ :index => Utility::Constants::CONNECTORS_INDEX,
93
+ :id => connector_id,
94
+ :ignore => 404,
95
+ :refresh => true
96
+ ).tap do |response|
97
+ seq_no = response['_seq_no']
98
+ primary_term = response['_primary_term']
99
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
100
+ end
101
+ if sync_in_progress
102
+ raise JobAlreadyRunningError.new(connector_id)
103
+ end
104
+ update_connector_fields(
105
+ connector_id,
106
+ { :sync_now => false,
107
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
108
+ :last_synced => Time.now },
109
+ seq_no,
110
+ primary_term
111
+ )
79
112
 
80
113
  body = {
81
114
  :connector_id => connector_id,
82
115
  :status => Connectors::SyncStatus::IN_PROGRESS,
83
116
  :worker_hostname => Socket.gethostname,
84
- :created_at => Time.now
117
+ :created_at => Time.now,
118
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
85
119
  }
86
- job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
120
 
88
- job['_id']
121
+ client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
122
+ end
123
+
124
+ def convert_connector_filtering_to_job_filtering(connector_filtering)
125
+ return [] unless connector_filtering
126
+ connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
127
+ connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
128
+ job_filtering << {
129
+ 'domain' => filtering_domain['domain'],
130
+ 'rules' => filtering_domain.dig('active', 'rules'),
131
+ 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
132
+ 'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
133
+ }
134
+ end
89
135
  end
90
136
 
91
137
  def update_connector_status(connector_id, status, error_message = nil)
@@ -100,7 +146,7 @@ module Core
100
146
  end
101
147
 
102
148
  def complete_sync(connector_id, job_id, status)
103
- sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
149
+ sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
104
150
 
105
151
  update_connector_fields(connector_id,
106
152
  :last_sync_status => sync_status,
@@ -136,7 +182,7 @@ module Core
136
182
  }
137
183
  loop do
138
184
  response = client.search(:body => body)
139
- hits = response['hits']['hits']
185
+ hits = response.dig('hits', 'hits') || []
140
186
 
141
187
  ids = hits.map { |h| h['_id'] }
142
188
  result += ids
@@ -242,15 +288,29 @@ module Core
242
288
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
289
  end
244
290
 
245
- def update_connector_fields(connector_id, doc = {})
291
+ def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
246
292
  return if doc.empty?
247
- client.update(
293
+ update_args = {
248
294
  :index => Utility::Constants::CONNECTORS_INDEX,
249
295
  :id => connector_id,
250
296
  :body => { :doc => doc },
251
297
  :refresh => true,
252
298
  :retry_on_conflict => 3
253
- )
299
+ }
300
+ # seq_no and primary_term are used for optimistic concurrency control
301
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
302
+ if seq_no && primary_term
303
+ update_args[:if_seq_no] = seq_no
304
+ update_args[:if_primary_term] = primary_term
305
+ update_args.delete(:retry_on_conflict)
306
+ end
307
+ begin
308
+ client.update(update_args)
309
+ rescue Elastic::Transport::Transport::Errors::Conflict
310
+ # VersionConflictException
311
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
312
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
313
+ end
254
314
  end
255
315
 
256
316
  private
@@ -9,6 +9,7 @@
9
9
  require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
12
13
  require 'utility/cron'
13
14
  require 'utility/logger'
14
15
  require 'utility/exception_tracking'
@@ -41,6 +42,8 @@ module Core
41
42
  if @is_shutting_down
42
43
  break
43
44
  end
45
+ rescue *Utility::AUTHORIZATION_ERRORS => e
46
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
44
47
  rescue StandardError => e
45
48
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
49
  ensure
@@ -152,7 +155,7 @@ module Core
152
155
  if Connectors::REGISTRY.registered?(service_type)
153
156
  true
154
157
  else
155
- Utility::Logger.info("The service type (#{service_type}) is not supported.")
158
+ Utility::Logger.warn("The service type (#{service_type}) is not supported.")
156
159
  false
157
160
  end
158
161
  end
@@ -5,6 +5,7 @@
5
5
  #
6
6
 
7
7
  require 'active_support/core_ext/string'
8
+ require 'elasticsearch'
8
9
 
9
10
  module Utility
10
11
  class DocumentError
@@ -31,6 +32,8 @@ module Utility
31
32
  end
32
33
 
33
34
  class ClientError < StandardError; end
35
+
36
+ class InvalidFilterConfigError < StandardError; end
34
37
  class EvictionWithNoProgressError < StandardError; end
35
38
  class EvictionError < StandardError
36
39
  attr_accessor :cursors
@@ -89,6 +92,7 @@ module Utility
89
92
  class InvalidTokenError < StandardError; end
90
93
  class TokenRefreshFailedError < StandardError; end
91
94
  class ConnectorNotAvailableError < StandardError; end
95
+ class AuthorizationError < StandardError; end
92
96
 
93
97
  # For when we want to explicitly set a #cause but can't
94
98
  class ExplicitlyCausedError < StandardError
@@ -124,6 +128,7 @@ module Utility
124
128
  end
125
129
  end
126
130
 
131
+ AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
127
132
  INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
128
133
  INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
129
134
  UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
@@ -20,8 +20,8 @@ module Utility
20
20
  attr_reader :cause
21
21
  end
22
22
 
23
- def initialize(es_config)
24
- super(connection_configs(es_config))
23
+ def initialize(es_config, &block)
24
+ super(connection_configs(es_config), &block)
25
25
  end
26
26
 
27
27
  def connection_configs(es_config)
@@ -39,6 +39,10 @@ module Utility
39
39
  configs[:log] = es_config[:log] || false
40
40
  configs[:trace] = es_config[:trace] || false
41
41
 
42
+ # transport options
43
+ configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
+ configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
+
42
46
  # if log or trace is activated, we use the application logger
43
47
  configs[:logger] = if configs[:log] || configs[:trace]
44
48
  Utility::Logger.logger
data/lib/utility.rb CHANGED
@@ -6,6 +6,7 @@
6
6
 
7
7
  require 'utility/constants'
8
8
  require 'utility/cron'
9
+ require 'utility/common'
9
10
  require 'utility/errors'
10
11
  require 'utility/es_client'
11
12
  require 'utility/environment'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.2
4
+ version: 8.6.0.4.pre.20221104T201057Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-27 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -125,9 +125,9 @@ homepage: https://github.com/elastic/connectors-ruby
125
125
  licenses:
126
126
  - Elastic-2.0
127
127
  metadata:
128
- revision: d6342a5a66d1f3361ce4a2621d2cf64bad9c8a1c
129
- repository: https://github.com/elastic/connectors-ruby.git
130
- post_install_message:
128
+ revision: 2051b3907639a1fe2ae68efdc33c06cf12d38383
129
+ repository: git@github.com:elastic/ent-search-connectors.git
130
+ post_install_message:
131
131
  rdoc_options: []
132
132
  require_paths:
133
133
  - lib
@@ -138,12 +138,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
138
138
  version: '0'
139
139
  required_rubygems_version: !ruby/object:Gem::Requirement
140
140
  requirements:
141
- - - ">="
141
+ - - ">"
142
142
  - !ruby/object:Gem::Version
143
- version: '0'
143
+ version: 1.3.1
144
144
  requirements: []
145
145
  rubygems_version: 3.0.3.1
146
- signing_key:
146
+ signing_key:
147
147
  specification_version: 4
148
148
  summary: Gem containing shared Connector Services libraries
149
149
  test_files: []