connectors_utility 8.6.0.4.pre.20221115T002329Z → 8.7.0.0.pre.20221117T004939Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 996cc59bf15e82bf245bc30716d0ac93861514fe9f8e73f92948f80e60adccc4
4
- data.tar.gz: f904929c8d82a73e1763cf30536c7e703b885d633f05e9cdecbba310eb44d99d
3
+ metadata.gz: 5d2972f8e6974a79b6088ce6c03453c327132ce19ffb09dbf30f349eae4c2108
4
+ data.tar.gz: 4fd458de07be07923e0675dc0f341b8211ba3daeec8ac27bfb4f9eb9aff2334a
5
5
  SHA512:
6
- metadata.gz: d22401f2411e468c734bb015e6027ede2858a64fcdeb6ea446ad9cc4871c68791e736dea230e5a2fcee2c77b84514c41314cd866f7766bb2906e4396b1c557e8
7
- data.tar.gz: e3ed8f7cb8e0d10bfbc220520ff7d98dcc7d221ee65f4790976eb6e1f538f78ac781eaa453258896749ed56e41eb3cf3c065a206cbb209740d43dbe6b37f7b52
6
+ metadata.gz: 9db02a3003d5645cbb5d57d4ca1bdb1acb65234f5a931afd9ccb06e2fbbe25be2394c65a72a9ae36038c8c127c35a0d937c83558ede0e4960fc688b073db052a
7
+ data.tar.gz: d02681e0d4009420b949ec649c9eac52bd533eee493c181ea1ffb15d939b561e81f234bac4885eefd0ea82c8564d7d330c0635c535b0616cfb9280e2e38512df
@@ -26,11 +26,16 @@ module Connectors
26
26
  ERROR
27
27
  ]
28
28
 
29
- PENDING_STATUES = [
29
+ PENDING_STATUSES = [
30
30
  PENDING,
31
31
  SUSPENDED
32
32
  ]
33
33
 
34
+ ACTIVE_STATUSES = [
35
+ IN_PROGRESS,
36
+ CANCELING
37
+ ]
38
+
34
39
  TERMINAL_STATUSES = [
35
40
  CANCELED,
36
41
  COMPLETED,
@@ -23,14 +23,11 @@ module Core
23
23
 
24
24
  DEFAULT_PAGE_SIZE = 100
25
25
 
26
- # Error Classes
27
- class ConnectorNotFoundError < StandardError; end
28
-
29
26
  def self.fetch_by_id(connector_id)
30
27
  es_response = ElasticConnectorActions.get_connector(connector_id)
31
- connectors_meta = ElasticConnectorActions.connectors_meta
28
+ return nil unless es_response[:found]
32
29
 
33
- raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
30
+ connectors_meta = ElasticConnectorActions.connectors_meta
34
31
  new(es_response, connectors_meta)
35
32
  end
36
33
 
@@ -122,6 +119,32 @@ module Core
122
119
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
123
120
  end
124
121
 
122
+ def ready_for_sync?
123
+ Connectors::REGISTRY.registered?(service_type) &&
124
+ valid_index_name? &&
125
+ connector_status_allows_sync?
126
+ end
127
+
128
+ def running?
129
+ @elasticsearch_response[:_source][:last_sync_status] == Connectors::SyncStatus::IN_PROGRESS
130
+ end
131
+
132
+ def update_last_sync!(job)
133
+ doc = {
134
+ :last_sync_status => job.status,
135
+ :last_synced => Time.now,
136
+ :last_sync_error => job.error,
137
+ :error => job.error
138
+ }
139
+
140
+ if job.terminated?
141
+ doc[:last_indexed_document_count] = job[:indexed_document_count]
142
+ doc[:last_deleted_document_count] = job[:deleted_document_count]
143
+ end
144
+
145
+ Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
146
+ end
147
+
125
148
  private
126
149
 
127
150
  def initialize(es_response, connectors_meta)
@@ -132,11 +132,35 @@ module Core
132
132
  update_connector_fields(connector_id, { :filtering => filtering })
133
133
  end
134
134
 
135
- def claim_job(connector_id)
135
+ def update_connector_sync_now(connector_id, sync_now)
136
+ doc = connector_with_concurrency_control(connector_id)
137
+
138
+ body = { sync_now: sync_now, last_synced: Time.now }
139
+
140
+ update_connector_fields(
141
+ connector_id,
142
+ body,
143
+ doc[:seq_no],
144
+ doc[:primary_term]
145
+ )
146
+ end
147
+
148
+ def update_connector_last_sync_status(connector_id, last_sync_status)
149
+ doc = connector_with_concurrency_control(connector_id)
150
+
151
+ update_connector_fields(
152
+ connector_id,
153
+ { last_sync_status: last_sync_status },
154
+ doc[:seq_no],
155
+ doc[:primary_term]
156
+ )
157
+ end
158
+
159
+ def connector_with_concurrency_control(connector_id)
136
160
  seq_no = nil
137
161
  primary_term = nil
138
- sync_in_progress = false
139
- connector_record = client.get(
162
+
163
+ doc = client.get(
140
164
  :index => Utility::Constants::CONNECTORS_INDEX,
141
165
  :id => connector_id,
142
166
  :ignore => 404,
@@ -144,42 +168,31 @@ module Core
144
168
  ).tap do |response|
145
169
  seq_no = response['_seq_no']
146
170
  primary_term = response['_primary_term']
147
- sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
148
- end
149
- if sync_in_progress
150
- raise JobAlreadyRunningError.new(connector_id)
151
171
  end
152
- update_connector_fields(
153
- connector_id,
154
- { :sync_now => false,
155
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
156
- :last_synced => Time.now },
157
- seq_no,
158
- primary_term
159
- )
160
172
 
173
+ { doc: doc, seq_no: seq_no, primary_term: primary_term }
174
+ end
175
+
176
+ def create_job(connector_settings:)
161
177
  body = {
162
- :status => Connectors::SyncStatus::IN_PROGRESS,
163
- :worker_hostname => Socket.gethostname,
164
- :created_at => Time.now,
165
- :started_at => Time.now,
166
- :last_seen => Time.now,
167
- :connector => {
168
- :id => connector_id,
169
- :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
178
+ status: Connectors::SyncStatus::PENDING,
179
+ created_at: Time.now,
180
+ last_seen: Time.now,
181
+ connector: {
182
+ id: connector_settings.id,
183
+ filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
184
+ index_name: connector_settings.index_name,
185
+ language: connector_settings[:language],
186
+ pipeline: connector_settings[:pipeline],
187
+ service_type: connector_settings.service_type
170
188
  }
171
189
  }
172
190
 
173
- index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
- if index_response['result'] == 'created'
175
- # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
- return client.get(
177
- :index => Utility::Constants::JOB_INDEX,
178
- :id => index_response['_id'],
179
- :ignore => 404
180
- ).with_indifferent_access
181
- end
182
- raise JobNotCreatedError.new(connector_id, index_response)
191
+ index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
192
+
193
+ return index_response if index_response['result'] == 'created'
194
+
195
+ raise JobNotCreatedError.new(connector_settings.id, index_response)
183
196
  end
184
197
 
185
198
  def convert_connector_filtering_to_job_filtering(connector_filtering)
@@ -507,31 +520,15 @@ module Core
507
520
  end
508
521
 
509
522
  def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
510
- return if doc.empty?
511
- update_args = {
512
- :index => Utility::Constants::CONNECTORS_INDEX,
513
- :id => connector_id,
514
- :body => { :doc => doc },
515
- :refresh => true,
516
- :retry_on_conflict => 3
517
- }
518
- # seq_no and primary_term are used for optimistic concurrency control
519
- # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
520
- if seq_no && primary_term
521
- update_args[:if_seq_no] = seq_no
522
- update_args[:if_primary_term] = primary_term
523
- update_args.delete(:retry_on_conflict)
524
- end
525
- begin
526
- client.update(update_args)
527
- rescue Elastic::Transport::Transport::Errors::Conflict
528
- # VersionConflictException
529
- # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
530
- raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
531
- end
523
+ update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
524
+ end
525
+
526
+ def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
527
+ update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
532
528
  end
533
529
 
534
530
  def document_count(index_name)
531
+ client.indices.refresh(:index => index_name)
535
532
  client.count(:index => index_name)['count']
536
533
  end
537
534
 
@@ -563,6 +560,31 @@ module Core
563
560
  filter.deep_merge!(new_validation_state)
564
561
  end
565
562
  end
563
+
564
+ def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
565
+ return if doc.empty?
566
+ update_args = {
567
+ :index => index,
568
+ :id => id,
569
+ :body => { :doc => doc },
570
+ :refresh => true,
571
+ :retry_on_conflict => 3
572
+ }
573
+
574
+ if seq_no && primary_term
575
+ update_args[:if_seq_no] = seq_no
576
+ update_args[:if_primary_term] = primary_term
577
+ update_args.delete(:retry_on_conflict)
578
+ end
579
+
580
+ begin
581
+ client.update(update_args)
582
+ rescue Elastic::Transport::Transport::Errors::Conflict
583
+ # VersionConflictException
584
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
585
+ raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
586
+ end
587
+ end
566
588
  end
567
589
  end
568
590
  end
@@ -6,12 +6,14 @@
6
6
 
7
7
  require 'json'
8
8
 
9
+ require 'utility/constants'
10
+
9
11
  module Utility
10
12
  class BulkQueue
11
13
  class QueueOverflowError < StandardError; end
12
14
 
13
15
  # 500 items or 5MB
14
- def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
16
+ def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
15
17
  @operation_count_threshold = operation_count_threshold.freeze
16
18
  @size_threshold = size_threshold.freeze
17
19
 
@@ -18,5 +18,10 @@ module Utility
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
19
  FILTERING_RULES_FEATURE = 'filtering_rules'
20
20
  FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
21
+
22
+ # Maximum number of operations in BULK Elasticsearch operation that will ingest the data
23
+ DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
24
+ # Maximum size of either whole BULK Elasticsearch operation or one document in it
25
+ DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
21
26
  end
22
27
  end
@@ -0,0 +1,108 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'utility/errors'
11
+ require 'utility/exception_tracking'
12
+
13
+ module Utility
14
+ class ErrorMonitor
15
+ class MonitoringError < StandardError
16
+ attr_accessor :tripped_by
17
+
18
+ def initialize(message = nil, tripped_by: nil)
19
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
20
+ @tripped_by = tripped_by
21
+ end
22
+ end
23
+
24
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
25
+ class MaxErrorsExceededError < MonitoringError; end
26
+ class MaxErrorsInWindowExceededError < MonitoringError; end
27
+
28
+ attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
29
+
30
+ def initialize(
31
+ max_errors: 1000,
32
+ max_consecutive_errors: 10,
33
+ max_error_ratio: 0.15,
34
+ window_size: 100,
35
+ error_queue_size: 20
36
+ )
37
+ @max_errors = max_errors
38
+ @max_consecutive_errors = max_consecutive_errors
39
+ @max_error_ratio = max_error_ratio
40
+ @window_size = window_size
41
+ @total_error_count = 0
42
+ @success_count = 0
43
+ @consecutive_error_count = 0
44
+ @window_errors = Array.new(window_size) { false }
45
+ @window_index = 0
46
+ @last_error = nil
47
+ @error_queue_size = error_queue_size
48
+ @error_queue = []
49
+ end
50
+
51
+ def note_success
52
+ @consecutive_error_count = 0
53
+ @success_count += 1
54
+ increment_window_index
55
+ end
56
+
57
+ def note_error(error, id: Time.now.to_i)
58
+ stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
59
+ error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
60
+ Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
61
+ @total_error_count += 1
62
+ @consecutive_error_count += 1
63
+ @window_errors[@window_index] = true
64
+ @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
65
+ @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
66
+ increment_window_index
67
+ @last_error = error
68
+
69
+ raise_if_necessary
70
+ end
71
+
72
+ def finalize
73
+ total_documents = @total_error_count + @success_count
74
+ if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
75
+ raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def raise_if_necessary
82
+ error =
83
+ if @consecutive_error_count > @max_consecutive_errors
84
+ MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
85
+ elsif @total_error_count > @max_errors
86
+ MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
87
+ elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
88
+ MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
89
+ end
90
+
91
+ raise_with_last_cause(error) if error
92
+ end
93
+
94
+ def num_errors_in_window
95
+ @window_errors.count(&:itself).to_f
96
+ end
97
+
98
+ def increment_window_index
99
+ @window_index = (@window_index + 1) % @window_size
100
+ end
101
+
102
+ def raise_with_last_cause(error)
103
+ raise @last_error
104
+ rescue StandardError
105
+ raise error
106
+ end
107
+ end
108
+ end
@@ -60,18 +60,6 @@ module Utility
60
60
  class JobDocumentLimitError < StandardError; end
61
61
  class JobClaimingError < StandardError; end
62
62
 
63
- class MonitoringError < StandardError
64
- attr_accessor :tripped_by
65
-
66
- def initialize(message = nil, tripped_by: nil)
67
- super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
68
- @tripped_by = tripped_by
69
- end
70
- end
71
- class MaxSuccessiveErrorsExceededError < MonitoringError; end
72
- class MaxErrorsExceededError < MonitoringError; end
73
- class MaxErrorsInWindowExceededError < MonitoringError; end
74
-
75
63
  class JobSyncNotPossibleYetError < StandardError
76
64
  attr_accessor :sync_will_be_possible_at
77
65
 
data/lib/utility.rb CHANGED
@@ -4,6 +4,8 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
7
9
  require 'utility/bulk_queue'
8
10
  require 'utility/common'
9
11
  require 'utility/constants'
@@ -11,9 +13,12 @@ require 'utility/cron'
11
13
  require 'utility/elasticsearch/index/mappings'
12
14
  require 'utility/elasticsearch/index/text_analysis_settings'
13
15
  require 'utility/environment'
16
+ require 'utility/error_monitor'
14
17
  require 'utility/errors'
15
18
  require 'utility/filtering'
16
19
  require 'utility/es_client'
17
20
  require 'utility/exception_tracking'
18
21
  require 'utility/extension_mapping_util'
19
22
  require 'utility/logger'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_utility
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221115T002329Z
4
+ version: 8.7.0.0.pre.20221117T004939Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-15 00:00:00.000000000 Z
11
+ date: 2022-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -119,6 +119,7 @@ files:
119
119
  - lib/utility/elasticsearch/index/mappings.rb
120
120
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
121
121
  - lib/utility/environment.rb
122
+ - lib/utility/error_monitor.rb
122
123
  - lib/utility/errors.rb
123
124
  - lib/utility/es_client.rb
124
125
  - lib/utility/exception_tracking.rb
@@ -129,9 +130,9 @@ homepage: https://github.com/elastic/connectors-ruby
129
130
  licenses:
130
131
  - Elastic-2.0
131
132
  metadata:
132
- revision: f506d5e5ebedfb0c6058d347d8ce22adc42e2cc0
133
- repository: git@github.com:elastic/ent-search-connectors.git
134
- post_install_message:
133
+ revision: 294214a26b0fe9a4347763b01de681c336e8daae
134
+ repository: https://github.com/elastic/connectors-ruby.git
135
+ post_install_message:
135
136
  rdoc_options: []
136
137
  require_paths:
137
138
  - lib
@@ -147,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
147
148
  version: 1.3.1
148
149
  requirements: []
149
150
  rubygems_version: 3.0.3.1
150
- signing_key:
151
+ signing_key:
151
152
  specification_version: 4
152
153
  summary: Gem containing shared Connector Services libraries
153
154
  test_files: []