connectors_utility 8.6.0.4.pre.20221115T001812Z → 8.6.0.4.pre.20221116T024609Z

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ac5e3edace211b9ddceef02ec8785a9967ddbce0c346f629f40bce93bb1e4951
4
- data.tar.gz: e766607f3784e051d52beedb5acec6a8e1fdbfc8334245980c21ac2e06777c10
3
+ metadata.gz: 6ae5315b00ad59db8c9a0300ec34f560a8293a285aa0d62fb5b9f662996b432d
4
+ data.tar.gz: 6eec049ff3a257bcff893edefd841e9b441dcc1858f7f88c4b0376fe47961bcc
5
5
  SHA512:
6
- metadata.gz: f8cb0768b2bd020d16b54bde70b396f88316fa68092663d0cf05045140b932e94e4aeb3646700527bc07517fff04e7296efccf77d22df8aad6ddbb7b7eaaa7b1
7
- data.tar.gz: ae307da6fdec1df3733d910ee7df68aad9fb52f15710539dabde845a405359534c63793649da5445c0191b2c51252ca3616e1f515177038dc317302e95cfcd64
6
+ metadata.gz: 296e79a4f866b91a98d02cf45ab7742efc2e10dd209be9456fcf6be34600247d55b09e7313030a386ccef693207b20b8802d5cb2ded3662cc289ed70fe5c1c3f
7
+ data.tar.gz: 7d59558a77f22e14a01ef279ae1da29195b1f1216920e6aecde81c2c5b5f11371a5f5fc23b87df5587ca3c1969389d86e5b02c189e24f94ea5a0590047af4b98
@@ -26,11 +26,16 @@ module Connectors
26
26
  ERROR
27
27
  ]
28
28
 
29
- PENDING_STATUES = [
29
+ PENDING_STATUSES = [
30
30
  PENDING,
31
31
  SUSPENDED
32
32
  ]
33
33
 
34
+ ACTIVE_STATUSES = [
35
+ IN_PROGRESS,
36
+ CANCELING
37
+ ]
38
+
34
39
  TERMINAL_STATUSES = [
35
40
  CANCELED,
36
41
  COMPLETED,
@@ -23,14 +23,11 @@ module Core
23
23
 
24
24
  DEFAULT_PAGE_SIZE = 100
25
25
 
26
- # Error Classes
27
- class ConnectorNotFoundError < StandardError; end
28
-
29
26
  def self.fetch_by_id(connector_id)
30
27
  es_response = ElasticConnectorActions.get_connector(connector_id)
31
- connectors_meta = ElasticConnectorActions.connectors_meta
28
+ return nil unless es_response[:found]
32
29
 
33
- raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
30
+ connectors_meta = ElasticConnectorActions.connectors_meta
34
31
  new(es_response, connectors_meta)
35
32
  end
36
33
 
@@ -122,6 +119,32 @@ module Core
122
119
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
123
120
  end
124
121
 
122
+ def ready_for_sync?
123
+ Connectors::REGISTRY.registered?(service_type) &&
124
+ valid_index_name? &&
125
+ connector_status_allows_sync?
126
+ end
127
+
128
+ def running?
129
+ @elasticsearch_response[:_source][:last_sync_status] == Connectors::SyncStatus::IN_PROGRESS
130
+ end
131
+
132
+ def update_last_sync!(job)
133
+ doc = {
134
+ :last_sync_status => job.status,
135
+ :last_synced => Time.now,
136
+ :last_sync_error => job.error,
137
+ :error => job.error
138
+ }
139
+
140
+ if job.terminated?
141
+ doc[:last_indexed_document_count] = job[:indexed_document_count]
142
+ doc[:last_deleted_document_count] = job[:deleted_document_count]
143
+ end
144
+
145
+ Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
146
+ end
147
+
125
148
  private
126
149
 
127
150
  def initialize(es_response, connectors_meta)
@@ -132,11 +132,35 @@ module Core
132
132
  update_connector_fields(connector_id, { :filtering => filtering })
133
133
  end
134
134
 
135
- def claim_job(connector_id)
135
+ def update_connector_sync_now(connector_id, sync_now)
136
+ doc = connector_with_concurrency_control(connector_id)
137
+
138
+ body = { sync_now: sync_now, last_synced: Time.now }
139
+
140
+ update_connector_fields(
141
+ connector_id,
142
+ body,
143
+ doc[:seq_no],
144
+ doc[:primary_term]
145
+ )
146
+ end
147
+
148
+ def update_connector_last_sync_status(connector_id, last_sync_status)
149
+ doc = connector_with_concurrency_control(connector_id)
150
+
151
+ update_connector_fields(
152
+ connector_id,
153
+ { last_sync_status: last_sync_status },
154
+ doc[:seq_no],
155
+ doc[:primary_term]
156
+ )
157
+ end
158
+
159
+ def connector_with_concurrency_control(connector_id)
136
160
  seq_no = nil
137
161
  primary_term = nil
138
- sync_in_progress = false
139
- connector_record = client.get(
162
+
163
+ doc = client.get(
140
164
  :index => Utility::Constants::CONNECTORS_INDEX,
141
165
  :id => connector_id,
142
166
  :ignore => 404,
@@ -144,42 +168,31 @@ module Core
144
168
  ).tap do |response|
145
169
  seq_no = response['_seq_no']
146
170
  primary_term = response['_primary_term']
147
- sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
148
- end
149
- if sync_in_progress
150
- raise JobAlreadyRunningError.new(connector_id)
151
171
  end
152
- update_connector_fields(
153
- connector_id,
154
- { :sync_now => false,
155
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
156
- :last_synced => Time.now },
157
- seq_no,
158
- primary_term
159
- )
160
172
 
173
+ { doc: doc, seq_no: seq_no, primary_term: primary_term }
174
+ end
175
+
176
+ def create_job(connector_settings:)
161
177
  body = {
162
- :status => Connectors::SyncStatus::IN_PROGRESS,
163
- :worker_hostname => Socket.gethostname,
164
- :created_at => Time.now,
165
- :started_at => Time.now,
166
- :last_seen => Time.now,
167
- :connector => {
168
- :id => connector_id,
169
- :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
178
+ status: Connectors::SyncStatus::PENDING,
179
+ created_at: Time.now,
180
+ last_seen: Time.now,
181
+ connector: {
182
+ id: connector_settings.id,
183
+ filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
184
+ index_name: connector_settings.index_name,
185
+ language: connector_settings[:language],
186
+ pipeline: connector_settings[:pipeline],
187
+ service_type: connector_settings.service_type
170
188
  }
171
189
  }
172
190
 
173
- index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
- if index_response['result'] == 'created'
175
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
- return client.get(
177
- :index => Utility::Constants::JOB_INDEX,
178
- :id => index_response['_id'],
179
- :ignore => 404
180
- ).with_indifferent_access
181
- end
182
- raise JobNotCreatedError.new(connector_id, index_response)
191
+ index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
192
+
193
+ return index_response if index_response['result'] == 'created'
194
+
195
+ raise JobNotCreatedError.new(connector_settings.id, index_response)
183
196
  end
184
197
 
185
198
  def convert_connector_filtering_to_job_filtering(connector_filtering)
@@ -507,31 +520,15 @@ module Core
507
520
  end
508
521
 
509
522
  def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
510
- return if doc.empty?
511
- update_args = {
512
- :index => Utility::Constants::CONNECTORS_INDEX,
513
- :id => connector_id,
514
- :body => { :doc => doc },
515
- :refresh => true,
516
- :retry_on_conflict => 3
517
- }
518
- # seq_no and primary_term are used for optimistic concurrency control
519
- # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
520
- if seq_no && primary_term
521
- update_args[:if_seq_no] = seq_no
522
- update_args[:if_primary_term] = primary_term
523
- update_args.delete(:retry_on_conflict)
524
- end
525
- begin
526
- client.update(update_args)
527
- rescue Elastic::Transport::Transport::Errors::Conflict
528
- # VersionConflictException
529
- # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
530
- raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
531
- end
523
+ update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
524
+ end
525
+
526
+ def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
527
+ update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
532
528
  end
533
529
 
534
530
  def document_count(index_name)
531
+ client.indices.refresh(:index => index_name)
535
532
  client.count(:index => index_name)['count']
536
533
  end
537
534
 
@@ -563,6 +560,31 @@ module Core
563
560
  filter.deep_merge!(new_validation_state)
564
561
  end
565
562
  end
563
+
564
+ def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
565
+ return if doc.empty?
566
+ update_args = {
567
+ :index => index,
568
+ :id => id,
569
+ :body => { :doc => doc },
570
+ :refresh => true,
571
+ :retry_on_conflict => 3
572
+ }
573
+
574
+ if seq_no && primary_term
575
+ update_args[:if_seq_no] = seq_no
576
+ update_args[:if_primary_term] = primary_term
577
+ update_args.delete(:retry_on_conflict)
578
+ end
579
+
580
+ begin
581
+ client.update(update_args)
582
+ rescue Elastic::Transport::Transport::Errors::Conflict
583
+ # VersionConflictException
584
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
585
+ raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
586
+ end
587
+ end
566
588
  end
567
589
  end
568
590
  end
@@ -0,0 +1,108 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'utility/errors'
11
+ require 'utility/exception_tracking'
12
+
13
+ module Utility
14
+ class ErrorMonitor
15
+ class MonitoringError < StandardError
16
+ attr_accessor :tripped_by
17
+
18
+ def initialize(message = nil, tripped_by: nil)
19
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
+ @tripped_by = tripped_by
21
+ end
22
+ end
23
+
24
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
+ class MaxErrorsExceededError < MonitoringError; end
26
+ class MaxErrorsInWindowExceededError < MonitoringError; end
27
+
28
+ attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
+
30
+ def initialize(
31
+ max_errors: 1000,
32
+ max_consecutive_errors: 10,
33
+ max_error_ratio: 0.15,
34
+ window_size: 100,
35
+ error_queue_size: 20
36
+ )
37
+ @max_errors = max_errors
38
+ @max_consecutive_errors = max_consecutive_errors
39
+ @max_error_ratio = max_error_ratio
40
+ @window_size = window_size
41
+ @total_error_count = 0
42
+ @success_count = 0
43
+ @consecutive_error_count = 0
44
+ @window_errors = Array.new(window_size) { false }
45
+ @window_index = 0
46
+ @last_error = nil
47
+ @error_queue_size = error_queue_size
48
+ @error_queue = []
49
+ end
50
+
51
+ def note_success
52
+ @consecutive_error_count = 0
53
+ @success_count += 1
54
+ increment_window_index
55
+ end
56
+
57
+ def note_error(error, id: Time.now.to_i)
58
+ stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
+ error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
+ Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
+ @total_error_count += 1
62
+ @consecutive_error_count += 1
63
+ @window_errors[@window_index] = true
64
+ @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
+ @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
+ increment_window_index
67
+ @last_error = error
68
+
69
+ raise_if_necessary
70
+ end
71
+
72
+ def finalize
73
+ total_documents = @total_error_count + @success_count
74
+ if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
+ raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def raise_if_necessary
82
+ error =
83
+ if @consecutive_error_count > @max_consecutive_errors
84
+ MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
+ elsif @total_error_count > @max_errors
86
+ MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
+ elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
+ MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
+ end
90
+
91
+ raise_with_last_cause(error) if error
92
+ end
93
+
94
+ def num_errors_in_window
95
+ @window_errors.count(&:itself).to_f
96
+ end
97
+
98
+ def increment_window_index
99
+ @window_index = (@window_index + 1) % @window_size
100
+ end
101
+
102
+ def raise_with_last_cause(error)
103
+ raise @last_error
104
+ rescue StandardError
105
+ raise error
106
+ end
107
+ end
108
+ end
@@ -60,18 +60,6 @@ module Utility
60
60
  class JobDocumentLimitError < StandardError; end
61
61
  class JobClaimingError < StandardError; end
62
62
 
63
- class MonitoringError < StandardError
64
- attr_accessor :tripped_by
65
-
66
- def initialize(message = nil, tripped_by: nil)
67
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
68
- @tripped_by = tripped_by
69
- end
70
- end
71
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
72
- class MaxErrorsExceededError < MonitoringError; end
73
- class MaxErrorsInWindowExceededError < MonitoringError; end
74
-
75
63
  class JobSyncNotPossibleYetError < StandardError
76
64
  attr_accessor :sync_will_be_possible_at
77
65
 
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
data/lib/utility.rb CHANGED
@@ -4,6 +4,8 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
7
9
  require 'utility/bulk_queue'
8
10
  require 'utility/common'
9
11
  require 'utility/constants'
@@ -11,8 +13,12 @@ require 'utility/cron'
11
13
  require 'utility/elasticsearch/index/mappings'
12
14
  require 'utility/elasticsearch/index/text_analysis_settings'
13
15
  require 'utility/environment'
16
+ require 'utility/error_monitor'
14
17
  require 'utility/errors'
18
+ require 'utility/filtering'
15
19
  require 'utility/es_client'
16
20
  require 'utility/exception_tracking'
17
21
  require 'utility/extension_mapping_util'
18
22
  require 'utility/logger'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221115T001812Z
4
+ version: 8.6.0.4.pre.20221116T024609Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-15 00:00:00.000000000 Z
11
+ date: 2022-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -119,16 +119,18 @@ files:
119
119
  - lib/utility/elasticsearch/index/mappings.rb
120
120
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
121
121
  - lib/utility/environment.rb
122
+ - lib/utility/error_monitor.rb
122
123
  - lib/utility/errors.rb
123
124
  - lib/utility/es_client.rb
124
125
  - lib/utility/exception_tracking.rb
125
126
  - lib/utility/extension_mapping_util.rb
127
+ - lib/utility/filtering.rb
126
128
  - lib/utility/logger.rb
127
129
  homepage: https://github.com/elastic/connectors-ruby
128
130
  licenses:
129
131
  - Elastic-2.0
130
132
  metadata:
131
- revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
133
+ revision: b3cc1332879a38930a272a63f8c6be1847578204
132
134
  repository: git@github.com:elastic/ent-search-connectors.git
133
135
  post_install_message:
134
136
  rdoc_options: []