connectors_utility 8.6.0.4.pre.20221115T002329Z → 8.7.0.0.pre.20221117T004939Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/core/connector_settings.rb +28 -5
- data/lib/core/elastic_connector_actions.rb +77 -55
- data/lib/utility/bulk_queue.rb +3 -1
- data/lib/utility/constants.rb +5 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility.rb +5 -0
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d2972f8e6974a79b6088ce6c03453c327132ce19ffb09dbf30f349eae4c2108
|
4
|
+
data.tar.gz: 4fd458de07be07923e0675dc0f341b8211ba3daeec8ac27bfb4f9eb9aff2334a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9db02a3003d5645cbb5d57d4ca1bdb1acb65234f5a931afd9ccb06e2fbbe25be2394c65a72a9ae36038c8c127c35a0d937c83558ede0e4960fc688b073db052a
|
7
|
+
data.tar.gz: d02681e0d4009420b949ec649c9eac52bd533eee493c181ea1ffb15d939b561e81f234bac4885eefd0ea82c8564d7d330c0635c535b0616cfb9280e2e38512df
|
@@ -23,14 +23,11 @@ module Core
|
|
23
23
|
|
24
24
|
DEFAULT_PAGE_SIZE = 100
|
25
25
|
|
26
|
-
# Error Classes
|
27
|
-
class ConnectorNotFoundError < StandardError; end
|
28
|
-
|
29
26
|
def self.fetch_by_id(connector_id)
|
30
27
|
es_response = ElasticConnectorActions.get_connector(connector_id)
|
31
|
-
|
28
|
+
return nil unless es_response[:found]
|
32
29
|
|
33
|
-
|
30
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
34
31
|
new(es_response, connectors_meta)
|
35
32
|
end
|
36
33
|
|
@@ -122,6 +119,32 @@ module Core
|
|
122
119
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
123
120
|
end
|
124
121
|
|
122
|
+
def ready_for_sync?
|
123
|
+
Connectors::REGISTRY.registered?(service_type) &&
|
124
|
+
valid_index_name? &&
|
125
|
+
connector_status_allows_sync?
|
126
|
+
end
|
127
|
+
|
128
|
+
def running?
|
129
|
+
@elasticsearch_response[:_source][:last_sync_status] == Connectors::SyncStatus::IN_PROGRESS
|
130
|
+
end
|
131
|
+
|
132
|
+
def update_last_sync!(job)
|
133
|
+
doc = {
|
134
|
+
:last_sync_status => job.status,
|
135
|
+
:last_synced => Time.now,
|
136
|
+
:last_sync_error => job.error,
|
137
|
+
:error => job.error
|
138
|
+
}
|
139
|
+
|
140
|
+
if job.terminated?
|
141
|
+
doc[:last_indexed_document_count] = job[:indexed_document_count]
|
142
|
+
doc[:last_deleted_document_count] = job[:deleted_document_count]
|
143
|
+
end
|
144
|
+
|
145
|
+
Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
|
146
|
+
end
|
147
|
+
|
125
148
|
private
|
126
149
|
|
127
150
|
def initialize(es_response, connectors_meta)
|
@@ -132,11 +132,35 @@ module Core
|
|
132
132
|
update_connector_fields(connector_id, { :filtering => filtering })
|
133
133
|
end
|
134
134
|
|
135
|
-
def
|
135
|
+
def update_connector_sync_now(connector_id, sync_now)
|
136
|
+
doc = connector_with_concurrency_control(connector_id)
|
137
|
+
|
138
|
+
body = { sync_now: sync_now, last_synced: Time.now }
|
139
|
+
|
140
|
+
update_connector_fields(
|
141
|
+
connector_id,
|
142
|
+
body,
|
143
|
+
doc[:seq_no],
|
144
|
+
doc[:primary_term]
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
def update_connector_last_sync_status(connector_id, last_sync_status)
|
149
|
+
doc = connector_with_concurrency_control(connector_id)
|
150
|
+
|
151
|
+
update_connector_fields(
|
152
|
+
connector_id,
|
153
|
+
{ last_sync_status: last_sync_status },
|
154
|
+
doc[:seq_no],
|
155
|
+
doc[:primary_term]
|
156
|
+
)
|
157
|
+
end
|
158
|
+
|
159
|
+
def connector_with_concurrency_control(connector_id)
|
136
160
|
seq_no = nil
|
137
161
|
primary_term = nil
|
138
|
-
|
139
|
-
|
162
|
+
|
163
|
+
doc = client.get(
|
140
164
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
141
165
|
:id => connector_id,
|
142
166
|
:ignore => 404,
|
@@ -144,42 +168,31 @@ module Core
|
|
144
168
|
).tap do |response|
|
145
169
|
seq_no = response['_seq_no']
|
146
170
|
primary_term = response['_primary_term']
|
147
|
-
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
148
|
-
end
|
149
|
-
if sync_in_progress
|
150
|
-
raise JobAlreadyRunningError.new(connector_id)
|
151
171
|
end
|
152
|
-
update_connector_fields(
|
153
|
-
connector_id,
|
154
|
-
{ :sync_now => false,
|
155
|
-
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
156
|
-
:last_synced => Time.now },
|
157
|
-
seq_no,
|
158
|
-
primary_term
|
159
|
-
)
|
160
172
|
|
173
|
+
{ doc: doc, seq_no: seq_no, primary_term: primary_term }
|
174
|
+
end
|
175
|
+
|
176
|
+
def create_job(connector_settings:)
|
161
177
|
body = {
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
|
167
|
-
|
168
|
-
:
|
169
|
-
:
|
178
|
+
status: Connectors::SyncStatus::PENDING,
|
179
|
+
created_at: Time.now,
|
180
|
+
last_seen: Time.now,
|
181
|
+
connector: {
|
182
|
+
id: connector_settings.id,
|
183
|
+
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
184
|
+
index_name: connector_settings.index_name,
|
185
|
+
language: connector_settings[:language],
|
186
|
+
pipeline: connector_settings[:pipeline],
|
187
|
+
service_type: connector_settings.service_type
|
170
188
|
}
|
171
189
|
}
|
172
190
|
|
173
|
-
index_response = client.index(:
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
:id => index_response['_id'],
|
179
|
-
:ignore => 404
|
180
|
-
).with_indifferent_access
|
181
|
-
end
|
182
|
-
raise JobNotCreatedError.new(connector_id, index_response)
|
191
|
+
index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
|
192
|
+
|
193
|
+
return index_response if index_response['result'] == 'created'
|
194
|
+
|
195
|
+
raise JobNotCreatedError.new(connector_settings.id, index_response)
|
183
196
|
end
|
184
197
|
|
185
198
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
@@ -507,31 +520,15 @@ module Core
|
|
507
520
|
end
|
508
521
|
|
509
522
|
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
:refresh => true,
|
516
|
-
:retry_on_conflict => 3
|
517
|
-
}
|
518
|
-
# seq_no and primary_term are used for optimistic concurrency control
|
519
|
-
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
520
|
-
if seq_no && primary_term
|
521
|
-
update_args[:if_seq_no] = seq_no
|
522
|
-
update_args[:if_primary_term] = primary_term
|
523
|
-
update_args.delete(:retry_on_conflict)
|
524
|
-
end
|
525
|
-
begin
|
526
|
-
client.update(update_args)
|
527
|
-
rescue Elastic::Transport::Transport::Errors::Conflict
|
528
|
-
# VersionConflictException
|
529
|
-
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
530
|
-
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
531
|
-
end
|
523
|
+
update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
|
524
|
+
end
|
525
|
+
|
526
|
+
def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
|
527
|
+
update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
|
532
528
|
end
|
533
529
|
|
534
530
|
def document_count(index_name)
|
531
|
+
client.indices.refresh(:index => index_name)
|
535
532
|
client.count(:index => index_name)['count']
|
536
533
|
end
|
537
534
|
|
@@ -563,6 +560,31 @@ module Core
|
|
563
560
|
filter.deep_merge!(new_validation_state)
|
564
561
|
end
|
565
562
|
end
|
563
|
+
|
564
|
+
def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
|
565
|
+
return if doc.empty?
|
566
|
+
update_args = {
|
567
|
+
:index => index,
|
568
|
+
:id => id,
|
569
|
+
:body => { :doc => doc },
|
570
|
+
:refresh => true,
|
571
|
+
:retry_on_conflict => 3
|
572
|
+
}
|
573
|
+
|
574
|
+
if seq_no && primary_term
|
575
|
+
update_args[:if_seq_no] = seq_no
|
576
|
+
update_args[:if_primary_term] = primary_term
|
577
|
+
update_args.delete(:retry_on_conflict)
|
578
|
+
end
|
579
|
+
|
580
|
+
begin
|
581
|
+
client.update(update_args)
|
582
|
+
rescue Elastic::Transport::Transport::Errors::Conflict
|
583
|
+
# VersionConflictException
|
584
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
585
|
+
raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
|
586
|
+
end
|
587
|
+
end
|
566
588
|
end
|
567
589
|
end
|
568
590
|
end
|
data/lib/utility/bulk_queue.rb
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
|
7
7
|
require 'json'
|
8
8
|
|
9
|
+
require 'utility/constants'
|
10
|
+
|
9
11
|
module Utility
|
10
12
|
class BulkQueue
|
11
13
|
class QueueOverflowError < StandardError; end
|
12
14
|
|
13
15
|
# 500 items or 5MB
|
14
|
-
def initialize(operation_count_threshold =
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
15
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
16
18
|
@size_threshold = size_threshold.freeze
|
17
19
|
|
data/lib/utility/constants.rb
CHANGED
@@ -18,5 +18,10 @@ module Utility
|
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
19
|
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
20
|
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
|
+
|
22
|
+
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
|
+
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
24
|
+
# Maximum size of either whole BULK Elasticsearch operation or one document in it
|
25
|
+
DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
|
21
26
|
end
|
22
27
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'utility/errors'
|
11
|
+
require 'utility/exception_tracking'
|
12
|
+
|
13
|
+
module Utility
|
14
|
+
class ErrorMonitor
|
15
|
+
class MonitoringError < StandardError
|
16
|
+
attr_accessor :tripped_by
|
17
|
+
|
18
|
+
def initialize(message = nil, tripped_by: nil)
|
19
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
+
@tripped_by = tripped_by
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
+
class MaxErrorsExceededError < MonitoringError; end
|
26
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
+
|
28
|
+
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
+
|
30
|
+
def initialize(
|
31
|
+
max_errors: 1000,
|
32
|
+
max_consecutive_errors: 10,
|
33
|
+
max_error_ratio: 0.15,
|
34
|
+
window_size: 100,
|
35
|
+
error_queue_size: 20
|
36
|
+
)
|
37
|
+
@max_errors = max_errors
|
38
|
+
@max_consecutive_errors = max_consecutive_errors
|
39
|
+
@max_error_ratio = max_error_ratio
|
40
|
+
@window_size = window_size
|
41
|
+
@total_error_count = 0
|
42
|
+
@success_count = 0
|
43
|
+
@consecutive_error_count = 0
|
44
|
+
@window_errors = Array.new(window_size) { false }
|
45
|
+
@window_index = 0
|
46
|
+
@last_error = nil
|
47
|
+
@error_queue_size = error_queue_size
|
48
|
+
@error_queue = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def note_success
|
52
|
+
@consecutive_error_count = 0
|
53
|
+
@success_count += 1
|
54
|
+
increment_window_index
|
55
|
+
end
|
56
|
+
|
57
|
+
def note_error(error, id: Time.now.to_i)
|
58
|
+
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
+
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
+
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
+
@total_error_count += 1
|
62
|
+
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
64
|
+
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
+
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
+
increment_window_index
|
67
|
+
@last_error = error
|
68
|
+
|
69
|
+
raise_if_necessary
|
70
|
+
end
|
71
|
+
|
72
|
+
def finalize
|
73
|
+
total_documents = @total_error_count + @success_count
|
74
|
+
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
+
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def raise_if_necessary
|
82
|
+
error =
|
83
|
+
if @consecutive_error_count > @max_consecutive_errors
|
84
|
+
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
+
elsif @total_error_count > @max_errors
|
86
|
+
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
+
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
+
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
+
end
|
90
|
+
|
91
|
+
raise_with_last_cause(error) if error
|
92
|
+
end
|
93
|
+
|
94
|
+
def num_errors_in_window
|
95
|
+
@window_errors.count(&:itself).to_f
|
96
|
+
end
|
97
|
+
|
98
|
+
def increment_window_index
|
99
|
+
@window_index = (@window_index + 1) % @window_size
|
100
|
+
end
|
101
|
+
|
102
|
+
def raise_with_last_cause(error)
|
103
|
+
raise @last_error
|
104
|
+
rescue StandardError
|
105
|
+
raise error
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/utility/errors.rb
CHANGED
@@ -60,18 +60,6 @@ module Utility
|
|
60
60
|
class JobDocumentLimitError < StandardError; end
|
61
61
|
class JobClaimingError < StandardError; end
|
62
62
|
|
63
|
-
class MonitoringError < StandardError
|
64
|
-
attr_accessor :tripped_by
|
65
|
-
|
66
|
-
def initialize(message = nil, tripped_by: nil)
|
67
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
68
|
-
@tripped_by = tripped_by
|
69
|
-
end
|
70
|
-
end
|
71
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
72
|
-
class MaxErrorsExceededError < MonitoringError; end
|
73
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
74
|
-
|
75
63
|
class JobSyncNotPossibleYetError < StandardError
|
76
64
|
attr_accessor :sync_will_be_possible_at
|
77
65
|
|
data/lib/utility.rb
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
# !!!!!!!!
|
8
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
7
9
|
require 'utility/bulk_queue'
|
8
10
|
require 'utility/common'
|
9
11
|
require 'utility/constants'
|
@@ -11,9 +13,12 @@ require 'utility/cron'
|
|
11
13
|
require 'utility/elasticsearch/index/mappings'
|
12
14
|
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
15
|
require 'utility/environment'
|
16
|
+
require 'utility/error_monitor'
|
14
17
|
require 'utility/errors'
|
15
18
|
require 'utility/filtering'
|
16
19
|
require 'utility/es_client'
|
17
20
|
require 'utility/exception_tracking'
|
18
21
|
require 'utility/extension_mapping_util'
|
19
22
|
require 'utility/logger'
|
23
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
24
|
+
# !!!!!!!!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T004939Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- lib/utility/elasticsearch/index/mappings.rb
|
120
120
|
- lib/utility/elasticsearch/index/text_analysis_settings.rb
|
121
121
|
- lib/utility/environment.rb
|
122
|
+
- lib/utility/error_monitor.rb
|
122
123
|
- lib/utility/errors.rb
|
123
124
|
- lib/utility/es_client.rb
|
124
125
|
- lib/utility/exception_tracking.rb
|
@@ -129,9 +130,9 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
129
130
|
licenses:
|
130
131
|
- Elastic-2.0
|
131
132
|
metadata:
|
132
|
-
revision:
|
133
|
-
repository:
|
134
|
-
post_install_message:
|
133
|
+
revision: 294214a26b0fe9a4347763b01de681c336e8daae
|
134
|
+
repository: https://github.com/elastic/connectors-ruby.git
|
135
|
+
post_install_message:
|
135
136
|
rdoc_options: []
|
136
137
|
require_paths:
|
137
138
|
- lib
|
@@ -147,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
148
|
version: 1.3.1
|
148
149
|
requirements: []
|
149
150
|
rubygems_version: 3.0.3.1
|
150
|
-
signing_key:
|
151
|
+
signing_key:
|
151
152
|
specification_version: 4
|
152
153
|
summary: Gem containing shared Connector Services libraries
|
153
154
|
test_files: []
|