connectors_utility 8.6.0.3 → 8.6.0.4.pre.20221104T202636Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb8d3022cb5894d77ba44a62be03bea6d9a944a49c97a9dcd87aa92f37ae418c
4
- data.tar.gz: 822efb10e7fed04267461bb1515d08ad6e9831c4c79add93391dd457e3961eeb
3
+ metadata.gz: 4532477e27d803b5c9933b26b714979aa2d6f99443e1e0c40a84abc46709f8d9
4
+ data.tar.gz: 7c568080ab4dcdc479f214bfda177cabf12475dbf33ca329d2d5fdf721de42c0
5
5
  SHA512:
6
- metadata.gz: e90c74ffd7efd454e855437ec51ef4c00620665a7dd46a131d37631a68f69c170bb149177619f10c825a6ea447483294e02be9c613d85c689822dfa7a78e42a2
7
- data.tar.gz: 7169e3d728d9eadd5441f38029137e6fd5706b6f5d06a1241419d91d89b130f5289ac8eccdd3ecd6b8b14706b89a8783f4d1029186788500a99c4822914d3301
6
+ metadata.gz: 31865ecfc46198f5a6b8649e22f369573b766799eb177e28f25ae7a269384c63165ce7ffd2b9f41c4644d7d4e238c063237881e10e9e699ad02a220a94c41ab6
7
+ data.tar.gz: 246e0f453c9cffd5566d09d8370eecce5d4c65739ea84832350f2b9e21d2192aa560ad9b12495f9ad3027bc0b44d0f28315775292240ee3431f4c32f65d05b69
@@ -8,11 +8,11 @@
8
8
 
9
9
  module Connectors
10
10
  class ConnectorStatus
11
- CREATED = 'created'
11
+ CREATED = 'created'
12
12
  NEEDS_CONFIGURATION = 'needs_configuration'
13
- CONFIGURED = 'configured'
14
- CONNECTED = 'connected'
15
- ERROR = 'error'
13
+ CONFIGURED = 'configured'
14
+ CONNECTED = 'connected'
15
+ ERROR = 'error'
16
16
 
17
17
  STATUSES = [
18
18
  CREATED,
@@ -8,14 +8,33 @@
8
8
 
9
9
  module Connectors
10
10
  class SyncStatus
11
- COMPLETED = 'completed'
11
+ PENDING = 'pending'
12
12
  IN_PROGRESS = 'in_progress'
13
- FAILED = 'failed'
13
+ CANCELING = 'canceling'
14
+ CANCELED = 'canceled'
15
+ SUSPENDED = 'suspended'
16
+ COMPLETED = 'completed'
17
+ ERROR = 'error'
14
18
 
15
19
  STATUSES = [
16
- COMPLETED,
20
+ PENDING,
17
21
  IN_PROGRESS,
18
- FAILED
22
+ CANCELING,
23
+ CANCELED,
24
+ SUSPENDED,
25
+ COMPLETED,
26
+ ERROR
27
+ ]
28
+
29
+ PENDING_STATUES = [
30
+ PENDING,
31
+ SUSPENDED
32
+ ]
33
+
34
+ TERMINAL_STATUSES = [
35
+ CANCELED,
36
+ COMPLETED,
37
+ ERROR
19
38
  ]
20
39
  end
21
40
  end
@@ -19,6 +19,8 @@ module Core
19
19
  DEFAULT_REDUCE_WHITESPACE = true
20
20
  DEFAULT_RUN_ML_INFERENCE = true
21
21
 
22
+ DEFAULT_FILTERING = {}
23
+
22
24
  DEFAULT_PAGE_SIZE = 100
23
25
 
24
26
  # Error Classes
@@ -80,20 +82,24 @@ module Core
80
82
  self[:scheduling]
81
83
  end
82
84
 
85
+ def filtering
86
+ Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
87
+ end
88
+
83
89
  def request_pipeline
84
- return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
90
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
85
91
  end
86
92
 
87
93
  def extract_binary_content?
88
- return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
94
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
89
95
  end
90
96
 
91
97
  def reduce_whitespace?
92
- return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
98
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
93
99
  end
94
100
 
95
101
  def run_ml_inference?
96
- return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
102
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
97
103
  end
98
104
 
99
105
  def formatted
@@ -110,8 +116,6 @@ module Core
110
116
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
111
117
  end
112
118
 
113
- private
114
-
115
119
  def self.fetch_connectors_by_query(query, page_size)
116
120
  connectors_meta = ElasticConnectorActions.connectors_meta
117
121
 
@@ -120,8 +124,8 @@ module Core
120
124
  loop do
121
125
  response = ElasticConnectorActions.search_connectors(query, page_size, offset)
122
126
 
123
- hits = response['hits']['hits']
124
- total = response['hits']['total']['value']
127
+ hits = response.dig('hits', 'hits') || []
128
+ total = response.dig('hits', 'total', 'value') || 0
125
129
  results += hits.map do |hit|
126
130
  Core::ConnectorSettings.new(hit, connectors_meta)
127
131
  end
@@ -132,11 +136,5 @@ module Core
132
136
  results
133
137
  end
134
138
 
135
- def return_if_present(*args)
136
- args.each do |arg|
137
- return arg unless arg.nil?
138
- end
139
- nil
140
- end
141
139
  end
142
140
  end
@@ -10,8 +10,21 @@ require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
11
  require 'connectors/sync_status'
12
12
  require 'utility'
13
+ require 'elastic-transport'
13
14
 
14
15
  module Core
16
+ class JobAlreadyRunningError < StandardError
17
+ def initialize(connector_id)
18
+ super("Sync job for connector '#{connector_id}' is already running.")
19
+ end
20
+ end
21
+
22
+ class ConnectorVersionChangedError < StandardError
23
+ def initialize(connector_id, seq_no, primary_term)
24
+ super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
25
+ end
26
+ end
27
+
15
28
  class ElasticConnectorActions
16
29
  class << self
17
30
 
@@ -72,20 +85,53 @@ module Core
72
85
  end
73
86
 
74
87
  def claim_job(connector_id)
75
- update_connector_fields(connector_id,
76
- :sync_now => false,
77
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
- :last_synced => Time.now)
88
+ seq_no = nil
89
+ primary_term = nil
90
+ sync_in_progress = false
91
+ connector_record = client.get(
92
+ :index => Utility::Constants::CONNECTORS_INDEX,
93
+ :id => connector_id,
94
+ :ignore => 404,
95
+ :refresh => true
96
+ ).tap do |response|
97
+ seq_no = response['_seq_no']
98
+ primary_term = response['_primary_term']
99
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
100
+ end
101
+ if sync_in_progress
102
+ raise JobAlreadyRunningError.new(connector_id)
103
+ end
104
+ update_connector_fields(
105
+ connector_id,
106
+ { :sync_now => false,
107
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
108
+ :last_synced => Time.now },
109
+ seq_no,
110
+ primary_term
111
+ )
79
112
 
80
113
  body = {
81
114
  :connector_id => connector_id,
82
115
  :status => Connectors::SyncStatus::IN_PROGRESS,
83
116
  :worker_hostname => Socket.gethostname,
84
- :created_at => Time.now
117
+ :created_at => Time.now,
118
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
85
119
  }
86
- job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
120
 
88
- job['_id']
121
+ client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
122
+ end
123
+
124
+ def convert_connector_filtering_to_job_filtering(connector_filtering)
125
+ return [] unless connector_filtering
126
+ connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
127
+ connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
128
+ job_filtering << {
129
+ 'domain' => filtering_domain['domain'],
130
+ 'rules' => filtering_domain.dig('active', 'rules'),
131
+ 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
132
+ 'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
133
+ }
134
+ end
89
135
  end
90
136
 
91
137
  def update_connector_status(connector_id, status, error_message = nil)
@@ -100,7 +146,7 @@ module Core
100
146
  end
101
147
 
102
148
  def complete_sync(connector_id, job_id, status)
103
- sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
149
+ sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
104
150
 
105
151
  update_connector_fields(connector_id,
106
152
  :last_sync_status => sync_status,
@@ -136,7 +182,7 @@ module Core
136
182
  }
137
183
  loop do
138
184
  response = client.search(:body => body)
139
- hits = response['hits']['hits']
185
+ hits = response.dig('hits', 'hits') || []
140
186
 
141
187
  ids = hits.map { |h| h['_id'] }
142
188
  result += ids
@@ -242,15 +288,29 @@ module Core
242
288
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
289
  end
244
290
 
245
- def update_connector_fields(connector_id, doc = {})
291
+ def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
246
292
  return if doc.empty?
247
- client.update(
293
+ update_args = {
248
294
  :index => Utility::Constants::CONNECTORS_INDEX,
249
295
  :id => connector_id,
250
296
  :body => { :doc => doc },
251
297
  :refresh => true,
252
298
  :retry_on_conflict => 3
253
- )
299
+ }
300
+ # seq_no and primary_term are used for optimistic concurrency control
301
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
302
+ if seq_no && primary_term
303
+ update_args[:if_seq_no] = seq_no
304
+ update_args[:if_primary_term] = primary_term
305
+ update_args.delete(:retry_on_conflict)
306
+ end
307
+ begin
308
+ client.update(update_args)
309
+ rescue Elastic::Transport::Transport::Errors::Conflict
310
+ # VersionConflictException
311
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
312
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
313
+ end
254
314
  end
255
315
 
256
316
  private
@@ -9,6 +9,7 @@
9
9
  require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
12
13
  require 'utility/cron'
13
14
  require 'utility/logger'
14
15
  require 'utility/exception_tracking'
@@ -41,6 +42,8 @@ module Core
41
42
  if @is_shutting_down
42
43
  break
43
44
  end
45
+ rescue *Utility::AUTHORIZATION_ERRORS => e
46
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
44
47
  rescue StandardError => e
45
48
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
49
  ensure
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Common
11
+ class << self
12
+ def return_if_present(*args)
13
+ args.each do |arg|
14
+ return arg unless arg.nil?
15
+ end
16
+ nil
17
+ end
18
+ end
19
+ end
20
+ end
@@ -5,6 +5,7 @@
5
5
  #
6
6
 
7
7
  require 'active_support/core_ext/string'
8
+ require 'elasticsearch'
8
9
 
9
10
  module Utility
10
11
  class DocumentError
@@ -31,6 +32,8 @@ module Utility
31
32
  end
32
33
 
33
34
  class ClientError < StandardError; end
35
+
36
+ class InvalidFilterConfigError < StandardError; end
34
37
  class EvictionWithNoProgressError < StandardError; end
35
38
  class EvictionError < StandardError
36
39
  attr_accessor :cursors
@@ -89,6 +92,7 @@ module Utility
89
92
  class InvalidTokenError < StandardError; end
90
93
  class TokenRefreshFailedError < StandardError; end
91
94
  class ConnectorNotAvailableError < StandardError; end
95
+ class AuthorizationError < StandardError; end
92
96
 
93
97
  # For when we want to explicitly set a #cause but can't
94
98
  class ExplicitlyCausedError < StandardError
@@ -124,6 +128,7 @@ module Utility
124
128
  end
125
129
  end
126
130
 
131
+ AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
127
132
  INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
128
133
  INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
129
134
  UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
@@ -20,8 +20,8 @@ module Utility
20
20
  attr_reader :cause
21
21
  end
22
22
 
23
- def initialize(es_config)
24
- super(connection_configs(es_config))
23
+ def initialize(es_config, &block)
24
+ super(connection_configs(es_config), &block)
25
25
  end
26
26
 
27
27
  def connection_configs(es_config)
@@ -39,6 +39,10 @@ module Utility
39
39
  configs[:log] = es_config[:log] || false
40
40
  configs[:trace] = es_config[:trace] || false
41
41
 
42
+ # transport options
43
+ configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
+ configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
+
42
46
  # if log or trace is activated, we use the application logger
43
47
  configs[:logger] = if configs[:log] || configs[:trace]
44
48
  Utility::Logger.logger
data/lib/utility.rb CHANGED
@@ -6,6 +6,7 @@
6
6
 
7
7
  require 'utility/constants'
8
8
  require 'utility/cron'
9
+ require 'utility/common'
9
10
  require 'utility/errors'
10
11
  require 'utility/es_client'
11
12
  require 'utility/environment'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.3
4
+ version: 8.6.0.4.pre.20221104T202636Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-03 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -110,6 +110,7 @@ files:
110
110
  - lib/core/elastic_connector_actions.rb
111
111
  - lib/core/scheduler.rb
112
112
  - lib/utility.rb
113
+ - lib/utility/common.rb
113
114
  - lib/utility/constants.rb
114
115
  - lib/utility/cron.rb
115
116
  - lib/utility/elasticsearch/index/language_data.yml
@@ -125,9 +126,9 @@ homepage: https://github.com/elastic/connectors-ruby
125
126
  licenses:
126
127
  - Elastic-2.0
127
128
  metadata:
128
- revision: aa2faf8cc993a26980441adffe97d62fdaf5aa5c
129
- repository: https://github.com/elastic/connectors-ruby.git
130
- post_install_message:
129
+ revision: 5e2710b9b4af2fed4c7b95502327260a3ebf9e25
130
+ repository: git@github.com:elastic/ent-search-connectors.git
131
+ post_install_message:
131
132
  rdoc_options: []
132
133
  require_paths:
133
134
  - lib
@@ -138,12 +139,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
138
139
  version: '0'
139
140
  required_rubygems_version: !ruby/object:Gem::Requirement
140
141
  requirements:
141
- - - ">="
142
+ - - ">"
142
143
  - !ruby/object:Gem::Version
143
- version: '0'
144
+ version: 1.3.1
144
145
  requirements: []
145
146
  rubygems_version: 3.0.3.1
146
- signing_key:
147
+ signing_key:
147
148
  specification_version: 4
148
149
  summary: Gem containing shared Connector Services libraries
149
150
  test_files: []