connectors_utility 8.6.0.3 → 8.6.0.4.pre.20221104T201057Z

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb8d3022cb5894d77ba44a62be03bea6d9a944a49c97a9dcd87aa92f37ae418c
4
- data.tar.gz: 822efb10e7fed04267461bb1515d08ad6e9831c4c79add93391dd457e3961eeb
3
+ metadata.gz: 75130f5f1747db0119b4619066fa71a9502d82c86838966c9453541704dc5e04
4
+ data.tar.gz: a1dfebbed9be1639600cce8f35f49800d9a5538e97a4ed57d8674d81796902d1
5
5
  SHA512:
6
- metadata.gz: e90c74ffd7efd454e855437ec51ef4c00620665a7dd46a131d37631a68f69c170bb149177619f10c825a6ea447483294e02be9c613d85c689822dfa7a78e42a2
7
- data.tar.gz: 7169e3d728d9eadd5441f38029137e6fd5706b6f5d06a1241419d91d89b130f5289ac8eccdd3ecd6b8b14706b89a8783f4d1029186788500a99c4822914d3301
6
+ metadata.gz: 6faaf8c6151c35c0923304cd11692a03bca2cf4f8f0544d38a76e30eb90ec3090e5f8b5a8249d1bd4f4724c0a4712b1ffc58a1d8be1d46fa18e0a1d1f5e2e045
7
+ data.tar.gz: 57de1647be647f92b9934445034203ed762a453f62e5ce1b4554dff2c1632370eb86bd3b7ff44b2a6380d4ab75f691e8c0c70a39e5caecdfee27f876c475a73f
@@ -8,11 +8,11 @@
8
8
 
9
9
  module Connectors
10
10
  class ConnectorStatus
11
- CREATED = 'created'
11
+ CREATED = 'created'
12
12
  NEEDS_CONFIGURATION = 'needs_configuration'
13
- CONFIGURED = 'configured'
14
- CONNECTED = 'connected'
15
- ERROR = 'error'
13
+ CONFIGURED = 'configured'
14
+ CONNECTED = 'connected'
15
+ ERROR = 'error'
16
16
 
17
17
  STATUSES = [
18
18
  CREATED,
@@ -8,14 +8,33 @@
8
8
 
9
9
  module Connectors
10
10
  class SyncStatus
11
- COMPLETED = 'completed'
11
+ PENDING = 'pending'
12
12
  IN_PROGRESS = 'in_progress'
13
- FAILED = 'failed'
13
+ CANCELING = 'canceling'
14
+ CANCELED = 'canceled'
15
+ SUSPENDED = 'suspended'
16
+ COMPLETED = 'completed'
17
+ ERROR = 'error'
14
18
 
15
19
  STATUSES = [
16
- COMPLETED,
20
+ PENDING,
17
21
  IN_PROGRESS,
18
- FAILED
22
+ CANCELING,
23
+ CANCELED,
24
+ SUSPENDED,
25
+ COMPLETED,
26
+ ERROR
27
+ ]
28
+
29
+ PENDING_STATUES = [
30
+ PENDING,
31
+ SUSPENDED
32
+ ]
33
+
34
+ TERMINAL_STATUSES = [
35
+ CANCELED,
36
+ COMPLETED,
37
+ ERROR
19
38
  ]
20
39
  end
21
40
  end
@@ -19,6 +19,8 @@ module Core
19
19
  DEFAULT_REDUCE_WHITESPACE = true
20
20
  DEFAULT_RUN_ML_INFERENCE = true
21
21
 
22
+ DEFAULT_FILTERING = {}
23
+
22
24
  DEFAULT_PAGE_SIZE = 100
23
25
 
24
26
  # Error Classes
@@ -80,20 +82,24 @@ module Core
80
82
  self[:scheduling]
81
83
  end
82
84
 
85
+ def filtering
86
+ Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
87
+ end
88
+
83
89
  def request_pipeline
84
- return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
90
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
85
91
  end
86
92
 
87
93
  def extract_binary_content?
88
- return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
94
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
89
95
  end
90
96
 
91
97
  def reduce_whitespace?
92
- return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
98
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
93
99
  end
94
100
 
95
101
  def run_ml_inference?
96
- return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
102
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
97
103
  end
98
104
 
99
105
  def formatted
@@ -110,8 +116,6 @@ module Core
110
116
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
111
117
  end
112
118
 
113
- private
114
-
115
119
  def self.fetch_connectors_by_query(query, page_size)
116
120
  connectors_meta = ElasticConnectorActions.connectors_meta
117
121
 
@@ -120,8 +124,8 @@ module Core
120
124
  loop do
121
125
  response = ElasticConnectorActions.search_connectors(query, page_size, offset)
122
126
 
123
- hits = response['hits']['hits']
124
- total = response['hits']['total']['value']
127
+ hits = response.dig('hits', 'hits') || []
128
+ total = response.dig('hits', 'total', 'value') || 0
125
129
  results += hits.map do |hit|
126
130
  Core::ConnectorSettings.new(hit, connectors_meta)
127
131
  end
@@ -132,11 +136,5 @@ module Core
132
136
  results
133
137
  end
134
138
 
135
- def return_if_present(*args)
136
- args.each do |arg|
137
- return arg unless arg.nil?
138
- end
139
- nil
140
- end
141
139
  end
142
140
  end
@@ -10,8 +10,21 @@ require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
11
  require 'connectors/sync_status'
12
12
  require 'utility'
13
+ require 'elastic-transport'
13
14
 
14
15
  module Core
16
+ class JobAlreadyRunningError < StandardError
17
+ def initialize(connector_id)
18
+ super("Sync job for connector '#{connector_id}' is already running.")
19
+ end
20
+ end
21
+
22
+ class ConnectorVersionChangedError < StandardError
23
+ def initialize(connector_id, seq_no, primary_term)
24
+ super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
25
+ end
26
+ end
27
+
15
28
  class ElasticConnectorActions
16
29
  class << self
17
30
 
@@ -72,20 +85,53 @@ module Core
72
85
  end
73
86
 
74
87
  def claim_job(connector_id)
75
- update_connector_fields(connector_id,
76
- :sync_now => false,
77
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
- :last_synced => Time.now)
88
+ seq_no = nil
89
+ primary_term = nil
90
+ sync_in_progress = false
91
+ connector_record = client.get(
92
+ :index => Utility::Constants::CONNECTORS_INDEX,
93
+ :id => connector_id,
94
+ :ignore => 404,
95
+ :refresh => true
96
+ ).tap do |response|
97
+ seq_no = response['_seq_no']
98
+ primary_term = response['_primary_term']
99
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
100
+ end
101
+ if sync_in_progress
102
+ raise JobAlreadyRunningError.new(connector_id)
103
+ end
104
+ update_connector_fields(
105
+ connector_id,
106
+ { :sync_now => false,
107
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
108
+ :last_synced => Time.now },
109
+ seq_no,
110
+ primary_term
111
+ )
79
112
 
80
113
  body = {
81
114
  :connector_id => connector_id,
82
115
  :status => Connectors::SyncStatus::IN_PROGRESS,
83
116
  :worker_hostname => Socket.gethostname,
84
- :created_at => Time.now
117
+ :created_at => Time.now,
118
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
85
119
  }
86
- job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
120
 
88
- job['_id']
121
+ client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
122
+ end
123
+
124
+ def convert_connector_filtering_to_job_filtering(connector_filtering)
125
+ return [] unless connector_filtering
126
+ connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
127
+ connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
128
+ job_filtering << {
129
+ 'domain' => filtering_domain['domain'],
130
+ 'rules' => filtering_domain.dig('active', 'rules'),
131
+ 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
132
+ 'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
133
+ }
134
+ end
89
135
  end
90
136
 
91
137
  def update_connector_status(connector_id, status, error_message = nil)
@@ -100,7 +146,7 @@ module Core
100
146
  end
101
147
 
102
148
  def complete_sync(connector_id, job_id, status)
103
- sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
149
+ sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
104
150
 
105
151
  update_connector_fields(connector_id,
106
152
  :last_sync_status => sync_status,
@@ -136,7 +182,7 @@ module Core
136
182
  }
137
183
  loop do
138
184
  response = client.search(:body => body)
139
- hits = response['hits']['hits']
185
+ hits = response.dig('hits', 'hits') || []
140
186
 
141
187
  ids = hits.map { |h| h['_id'] }
142
188
  result += ids
@@ -242,15 +288,29 @@ module Core
242
288
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
289
  end
244
290
 
245
- def update_connector_fields(connector_id, doc = {})
291
+ def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
246
292
  return if doc.empty?
247
- client.update(
293
+ update_args = {
248
294
  :index => Utility::Constants::CONNECTORS_INDEX,
249
295
  :id => connector_id,
250
296
  :body => { :doc => doc },
251
297
  :refresh => true,
252
298
  :retry_on_conflict => 3
253
- )
299
+ }
300
+ # seq_no and primary_term are used for optimistic concurrency control
301
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
302
+ if seq_no && primary_term
303
+ update_args[:if_seq_no] = seq_no
304
+ update_args[:if_primary_term] = primary_term
305
+ update_args.delete(:retry_on_conflict)
306
+ end
307
+ begin
308
+ client.update(update_args)
309
+ rescue Elastic::Transport::Transport::Errors::Conflict
310
+ # VersionConflictException
311
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
312
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
313
+ end
254
314
  end
255
315
 
256
316
  private
@@ -9,6 +9,7 @@
9
9
  require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
12
13
  require 'utility/cron'
13
14
  require 'utility/logger'
14
15
  require 'utility/exception_tracking'
@@ -41,6 +42,8 @@ module Core
41
42
  if @is_shutting_down
42
43
  break
43
44
  end
45
+ rescue *Utility::AUTHORIZATION_ERRORS => e
46
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
44
47
  rescue StandardError => e
45
48
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
49
  ensure
@@ -5,6 +5,7 @@
5
5
  #
6
6
 
7
7
  require 'active_support/core_ext/string'
8
+ require 'elasticsearch'
8
9
 
9
10
  module Utility
10
11
  class DocumentError
@@ -31,6 +32,8 @@ module Utility
31
32
  end
32
33
 
33
34
  class ClientError < StandardError; end
35
+
36
+ class InvalidFilterConfigError < StandardError; end
34
37
  class EvictionWithNoProgressError < StandardError; end
35
38
  class EvictionError < StandardError
36
39
  attr_accessor :cursors
@@ -89,6 +92,7 @@ module Utility
89
92
  class InvalidTokenError < StandardError; end
90
93
  class TokenRefreshFailedError < StandardError; end
91
94
  class ConnectorNotAvailableError < StandardError; end
95
+ class AuthorizationError < StandardError; end
92
96
 
93
97
  # For when we want to explicitly set a #cause but can't
94
98
  class ExplicitlyCausedError < StandardError
@@ -124,6 +128,7 @@ module Utility
124
128
  end
125
129
  end
126
130
 
131
+ AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
127
132
  INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
128
133
  INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
129
134
  UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
@@ -20,8 +20,8 @@ module Utility
20
20
  attr_reader :cause
21
21
  end
22
22
 
23
- def initialize(es_config)
24
- super(connection_configs(es_config))
23
+ def initialize(es_config, &block)
24
+ super(connection_configs(es_config), &block)
25
25
  end
26
26
 
27
27
  def connection_configs(es_config)
@@ -39,6 +39,10 @@ module Utility
39
39
  configs[:log] = es_config[:log] || false
40
40
  configs[:trace] = es_config[:trace] || false
41
41
 
42
+ # transport options
43
+ configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
+ configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
+
42
46
  # if log or trace is activated, we use the application logger
43
47
  configs[:logger] = if configs[:log] || configs[:trace]
44
48
  Utility::Logger.logger
data/lib/utility.rb CHANGED
@@ -6,6 +6,7 @@
6
6
 
7
7
  require 'utility/constants'
8
8
  require 'utility/cron'
9
+ require 'utility/common'
9
10
  require 'utility/errors'
10
11
  require 'utility/es_client'
11
12
  require 'utility/environment'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.3
4
+ version: 8.6.0.4.pre.20221104T201057Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-03 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -125,9 +125,9 @@ homepage: https://github.com/elastic/connectors-ruby
125
125
  licenses:
126
126
  - Elastic-2.0
127
127
  metadata:
128
- revision: aa2faf8cc993a26980441adffe97d62fdaf5aa5c
129
- repository: https://github.com/elastic/connectors-ruby.git
130
- post_install_message:
128
+ revision: 2051b3907639a1fe2ae68efdc33c06cf12d38383
129
+ repository: git@github.com:elastic/ent-search-connectors.git
130
+ post_install_message:
131
131
  rdoc_options: []
132
132
  require_paths:
133
133
  - lib
@@ -138,12 +138,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
138
138
  version: '0'
139
139
  required_rubygems_version: !ruby/object:Gem::Requirement
140
140
  requirements:
141
- - - ">="
141
+ - - ">"
142
142
  - !ruby/object:Gem::Version
143
- version: '0'
143
+ version: 1.3.1
144
144
  requirements: []
145
145
  rubygems_version: 3.0.3.1
146
- signing_key:
146
+ signing_key:
147
147
  specification_version: 4
148
148
  summary: Gem containing shared Connector Services libraries
149
149
  test_files: []