connectors_utility 8.6.0.4.pre.20221115T002329Z → 8.6.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/core/connector_job.rb +240 -0
- data/lib/core/connector_settings.rb +35 -5
- data/lib/core/elastic_connector_actions.rb +88 -86
- data/lib/core/scheduler.rb +7 -7
- data/lib/utility/bulk_queue.rb +3 -1
- data/lib/utility/constants.rb +5 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility.rb +5 -0
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd3d8a98ffaf8965434e1c01467c4ff30d87a1b25e414d5ce36f8a8529178053
|
4
|
+
data.tar.gz: 07d54a470c31ba311aeee4aca41401bca11178d20ed7de72c8dadf7926e910d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eddc02838146053fa61173a5dc6060eb62c92c70930bd5c1338586572af08078288455c65fb2301796d99b338b996ec889966e535bbceaf9f1f39bac6c9888bc
|
7
|
+
data.tar.gz: e0ec181ccc3cb8dd59f123003d873c1218f2e75c6d91569efe075330d913d6bb366c4bcfbc389844b8040a10e22e5b8c797525379d042cb81a2fa2c7e3af08a4
|
@@ -0,0 +1,240 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
+
require 'connectors/sync_status'
|
11
|
+
require 'core/connector_settings'
|
12
|
+
require 'core/elastic_connector_actions'
|
13
|
+
require 'utility'
|
14
|
+
|
15
|
+
module Core
|
16
|
+
class ConnectorJob
|
17
|
+
DEFAULT_PAGE_SIZE = 100
|
18
|
+
STUCK_THRESHOLD = 60
|
19
|
+
|
20
|
+
def self.fetch_by_id(job_id)
|
21
|
+
es_response = ElasticConnectorActions.get_job(job_id)
|
22
|
+
return nil unless es_response[:found]
|
23
|
+
|
24
|
+
new(es_response)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.pending_jobs(connectors_ids: [], page_size: DEFAULT_PAGE_SIZE)
|
28
|
+
status_term = { status: Connectors::SyncStatus::PENDING_STATUSES }
|
29
|
+
|
30
|
+
query = { bool: { must: [{ terms: status_term }] } }
|
31
|
+
|
32
|
+
return fetch_jobs_by_query(query, page_size) if connectors_ids.empty?
|
33
|
+
|
34
|
+
query[:bool][:must] << { terms: { 'connector.id' => connectors_ids } }
|
35
|
+
|
36
|
+
fetch_jobs_by_query(query, page_size)
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
|
40
|
+
connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
|
41
|
+
query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
|
42
|
+
fetch_jobs_by_query(query, page_size)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.delete_jobs(jobs)
|
46
|
+
query = { terms: { '_id': jobs.map(&:id) } }
|
47
|
+
ElasticConnectorActions.delete_jobs_by_query(query)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.stuck_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
|
51
|
+
connector_ids = if connector_id
|
52
|
+
[connector_id]
|
53
|
+
else
|
54
|
+
ConnectorSettings.fetch_native_connectors.map(&:id)
|
55
|
+
end
|
56
|
+
query = {
|
57
|
+
bool: {
|
58
|
+
filter: [
|
59
|
+
{ terms: { 'connector.id': connector_ids } },
|
60
|
+
{ terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
|
61
|
+
{ range: { last_seen: { lte: "now-#{STUCK_THRESHOLD}s" } } }
|
62
|
+
]
|
63
|
+
}
|
64
|
+
}
|
65
|
+
fetch_jobs_by_query(query, page_size)
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.enqueue(_connector_id)
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
|
72
|
+
def id
|
73
|
+
@elasticsearch_response[:_id]
|
74
|
+
end
|
75
|
+
|
76
|
+
def [](property_name)
|
77
|
+
@elasticsearch_response[:_source][property_name]
|
78
|
+
end
|
79
|
+
|
80
|
+
def error
|
81
|
+
self[:error]
|
82
|
+
end
|
83
|
+
|
84
|
+
def status
|
85
|
+
self[:status]
|
86
|
+
end
|
87
|
+
|
88
|
+
def in_progress?
|
89
|
+
status == Connectors::SyncStatus::IN_PROGRESS
|
90
|
+
end
|
91
|
+
|
92
|
+
def canceling?
|
93
|
+
status == Connectors::SyncStatus::CANCELING
|
94
|
+
end
|
95
|
+
|
96
|
+
def suspended?
|
97
|
+
status == Connectors::SyncStatus::SUSPENDED
|
98
|
+
end
|
99
|
+
|
100
|
+
def canceled?
|
101
|
+
status == Connectors::SyncStatus::CANCELED
|
102
|
+
end
|
103
|
+
|
104
|
+
def pending?
|
105
|
+
Connectors::SyncStatus::PENDING_STATUSES.include?(status)
|
106
|
+
end
|
107
|
+
|
108
|
+
def active?
|
109
|
+
Connectors::SyncStatus::ACTIVE_STATUSES.include?(status)
|
110
|
+
end
|
111
|
+
|
112
|
+
def terminated?
|
113
|
+
Connectors::SyncStatus::TERMINAL_STATUSES.include?(status)
|
114
|
+
end
|
115
|
+
|
116
|
+
def connector_snapshot
|
117
|
+
self[:connector] || {}
|
118
|
+
end
|
119
|
+
|
120
|
+
def connector_id
|
121
|
+
connector_snapshot[:id]
|
122
|
+
end
|
123
|
+
|
124
|
+
def index_name
|
125
|
+
connector_snapshot[:index_name]
|
126
|
+
end
|
127
|
+
|
128
|
+
def language
|
129
|
+
connector_snapshot[:language]
|
130
|
+
end
|
131
|
+
|
132
|
+
def service_type
|
133
|
+
connector_snapshot[:service_type]
|
134
|
+
end
|
135
|
+
|
136
|
+
def configuration
|
137
|
+
connector_snapshot[:configuration]
|
138
|
+
end
|
139
|
+
|
140
|
+
def filtering
|
141
|
+
connector_snapshot[:filtering]
|
142
|
+
end
|
143
|
+
|
144
|
+
def pipeline
|
145
|
+
connector_snapshot[:pipeline]
|
146
|
+
end
|
147
|
+
|
148
|
+
def connector
|
149
|
+
@connector ||= ConnectorSettings.fetch_by_id(connector_id)
|
150
|
+
end
|
151
|
+
|
152
|
+
def update_metadata(ingestion_stats = {}, connector_metadata = {})
|
153
|
+
ingestion_stats ||= {}
|
154
|
+
doc = { :last_seen => Time.now }.merge(ingestion_stats)
|
155
|
+
doc[:metadata] = connector_metadata if connector_metadata&.any?
|
156
|
+
ElasticConnectorActions.update_job_fields(id, doc)
|
157
|
+
end
|
158
|
+
|
159
|
+
def done!(ingestion_stats = {}, connector_metadata = {})
|
160
|
+
terminate!(Connectors::SyncStatus::COMPLETED, nil, ingestion_stats, connector_metadata)
|
161
|
+
end
|
162
|
+
|
163
|
+
def error!(message, ingestion_stats = {}, connector_metadata = {})
|
164
|
+
terminate!(Connectors::SyncStatus::ERROR, message, ingestion_stats, connector_metadata)
|
165
|
+
end
|
166
|
+
|
167
|
+
def cancel!(ingestion_stats = {}, connector_metadata = {})
|
168
|
+
terminate!(Connectors::SyncStatus::CANCELED, nil, ingestion_stats, connector_metadata)
|
169
|
+
end
|
170
|
+
|
171
|
+
def with_concurrency_control
|
172
|
+
response = ElasticConnectorActions.get_job(id)
|
173
|
+
|
174
|
+
yield response, response['_seq_no'], response['_primary_term']
|
175
|
+
end
|
176
|
+
|
177
|
+
def make_running!
|
178
|
+
with_concurrency_control do |es_doc, seq_no, primary_term|
|
179
|
+
now = Time.now
|
180
|
+
doc = {
|
181
|
+
status: Connectors::SyncStatus::IN_PROGRESS,
|
182
|
+
started_at: now,
|
183
|
+
last_seen: now,
|
184
|
+
worker_hostname: Socket.gethostname
|
185
|
+
}
|
186
|
+
|
187
|
+
ElasticConnectorActions.update_job_fields(es_doc[:_id], doc, seq_no, primary_term)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def es_source
|
192
|
+
@elasticsearch_response[:_source]
|
193
|
+
end
|
194
|
+
|
195
|
+
private
|
196
|
+
|
197
|
+
def self.fetch_jobs_by_query(query, page_size)
|
198
|
+
results = []
|
199
|
+
offset = 0
|
200
|
+
loop do
|
201
|
+
response = ElasticConnectorActions.search_jobs(query, page_size, offset)
|
202
|
+
|
203
|
+
hits = response.dig('hits', 'hits') || []
|
204
|
+
total = response.dig('hits', 'total', 'value') || 0
|
205
|
+
results += hits.map { |hit| new(hit) }
|
206
|
+
break if results.size >= total
|
207
|
+
offset += hits.size
|
208
|
+
end
|
209
|
+
|
210
|
+
results
|
211
|
+
end
|
212
|
+
|
213
|
+
def initialize(es_response)
|
214
|
+
# TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
|
215
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
216
|
+
end
|
217
|
+
|
218
|
+
def terminate!(status, error = nil, ingestion_stats = {}, connector_metadata = {})
|
219
|
+
ingestion_stats ||= {}
|
220
|
+
ingestion_stats[:total_document_count] = ElasticConnectorActions.document_count(index_name)
|
221
|
+
doc = {
|
222
|
+
:last_seen => Time.now,
|
223
|
+
:completed_at => Time.now,
|
224
|
+
:status => status,
|
225
|
+
:error => error
|
226
|
+
}.merge(ingestion_stats)
|
227
|
+
doc[:canceled_at] = Time.now if status == Connectors::SyncStatus::CANCELED
|
228
|
+
doc[:metadata] = connector_metadata if connector_metadata&.any?
|
229
|
+
ElasticConnectorActions.update_job_fields(id, doc)
|
230
|
+
end
|
231
|
+
|
232
|
+
def seq_no
|
233
|
+
@elasticsearch_response[:_seq_no]
|
234
|
+
end
|
235
|
+
|
236
|
+
def primary_term
|
237
|
+
@elasticsearch_response[:_primary_term]
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
@@ -23,14 +23,11 @@ module Core
|
|
23
23
|
|
24
24
|
DEFAULT_PAGE_SIZE = 100
|
25
25
|
|
26
|
-
# Error Classes
|
27
|
-
class ConnectorNotFoundError < StandardError; end
|
28
|
-
|
29
26
|
def self.fetch_by_id(connector_id)
|
30
27
|
es_response = ElasticConnectorActions.get_connector(connector_id)
|
31
|
-
|
28
|
+
return nil unless es_response[:found]
|
32
29
|
|
33
|
-
|
30
|
+
connectors_meta = ElasticConnectorActions.connectors_meta
|
34
31
|
new(es_response, connectors_meta)
|
35
32
|
end
|
36
33
|
|
@@ -52,6 +49,11 @@ module Core
|
|
52
49
|
fetch_connectors_by_query(query, page_size)
|
53
50
|
end
|
54
51
|
|
52
|
+
def self.fetch_all_connectors(page_size = DEFAULT_PAGE_SIZE)
|
53
|
+
query = { match_all: {} }
|
54
|
+
fetch_connectors_by_query(query, page_size)
|
55
|
+
end
|
56
|
+
|
55
57
|
def id
|
56
58
|
@elasticsearch_response[:_id]
|
57
59
|
end
|
@@ -122,6 +124,34 @@ module Core
|
|
122
124
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
123
125
|
end
|
124
126
|
|
127
|
+
def ready_for_sync?
|
128
|
+
Connectors::REGISTRY.registered?(service_type) &&
|
129
|
+
valid_index_name? &&
|
130
|
+
connector_status_allows_sync?
|
131
|
+
end
|
132
|
+
|
133
|
+
def running?
|
134
|
+
@elasticsearch_response[:_source][:last_sync_status] == Connectors::SyncStatus::IN_PROGRESS
|
135
|
+
end
|
136
|
+
|
137
|
+
def update_last_sync!(job)
|
138
|
+
# if job is nil, connector still needs to be updated, to avoid it stuck at in_progress
|
139
|
+
job_status = job&.status || Connectors::SyncStatus::ERROR
|
140
|
+
job_error = job.nil? ? 'Could\'t find the job' : job.error
|
141
|
+
job_error ||= 'unknown error' if job_status == Connectors::SyncStatus::ERROR
|
142
|
+
doc = {
|
143
|
+
:last_sync_status => job_status,
|
144
|
+
:last_synced => Time.now,
|
145
|
+
:last_sync_error => job_error,
|
146
|
+
:error => job_error
|
147
|
+
}
|
148
|
+
if job&.terminated?
|
149
|
+
doc[:last_indexed_document_count] = job[:indexed_document_count]
|
150
|
+
doc[:last_deleted_document_count] = job[:deleted_document_count]
|
151
|
+
end
|
152
|
+
Core::ElasticConnectorActions.update_connector_fields(id, doc)
|
153
|
+
end
|
154
|
+
|
125
155
|
private
|
126
156
|
|
127
157
|
def initialize(es_response, connectors_meta)
|
@@ -91,6 +91,17 @@ module Core
|
|
91
91
|
)
|
92
92
|
end
|
93
93
|
|
94
|
+
def delete_jobs_by_query(query)
|
95
|
+
client.delete_by_query(
|
96
|
+
:index => Utility::Constants::JOB_INDEX,
|
97
|
+
:body => { :query => query }
|
98
|
+
)
|
99
|
+
end
|
100
|
+
|
101
|
+
def delete_indices(indices)
|
102
|
+
client.indices.delete(:index => indices, :ignore_unavailable => true)
|
103
|
+
end
|
104
|
+
|
94
105
|
def update_connector_configuration(connector_id, configuration)
|
95
106
|
update_connector_fields(connector_id, :configuration => configuration)
|
96
107
|
end
|
@@ -132,11 +143,35 @@ module Core
|
|
132
143
|
update_connector_fields(connector_id, { :filtering => filtering })
|
133
144
|
end
|
134
145
|
|
135
|
-
def
|
146
|
+
def update_connector_sync_now(connector_id, sync_now)
|
147
|
+
doc = connector_with_concurrency_control(connector_id)
|
148
|
+
|
149
|
+
body = { sync_now: sync_now, last_synced: Time.now }
|
150
|
+
|
151
|
+
update_connector_fields(
|
152
|
+
connector_id,
|
153
|
+
body,
|
154
|
+
doc[:seq_no],
|
155
|
+
doc[:primary_term]
|
156
|
+
)
|
157
|
+
end
|
158
|
+
|
159
|
+
def update_connector_last_sync_status(connector_id, last_sync_status)
|
160
|
+
doc = connector_with_concurrency_control(connector_id)
|
161
|
+
|
162
|
+
update_connector_fields(
|
163
|
+
connector_id,
|
164
|
+
{ last_sync_status: last_sync_status },
|
165
|
+
doc[:seq_no],
|
166
|
+
doc[:primary_term]
|
167
|
+
)
|
168
|
+
end
|
169
|
+
|
170
|
+
def connector_with_concurrency_control(connector_id)
|
136
171
|
seq_no = nil
|
137
172
|
primary_term = nil
|
138
|
-
|
139
|
-
|
173
|
+
|
174
|
+
doc = client.get(
|
140
175
|
:index => Utility::Constants::CONNECTORS_INDEX,
|
141
176
|
:id => connector_id,
|
142
177
|
:ignore => 404,
|
@@ -144,42 +179,31 @@ module Core
|
|
144
179
|
).tap do |response|
|
145
180
|
seq_no = response['_seq_no']
|
146
181
|
primary_term = response['_primary_term']
|
147
|
-
sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
|
148
|
-
end
|
149
|
-
if sync_in_progress
|
150
|
-
raise JobAlreadyRunningError.new(connector_id)
|
151
182
|
end
|
152
|
-
update_connector_fields(
|
153
|
-
connector_id,
|
154
|
-
{ :sync_now => false,
|
155
|
-
:last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
|
156
|
-
:last_synced => Time.now },
|
157
|
-
seq_no,
|
158
|
-
primary_term
|
159
|
-
)
|
160
183
|
|
184
|
+
{ doc: doc, seq_no: seq_no, primary_term: primary_term }
|
185
|
+
end
|
186
|
+
|
187
|
+
def create_job(connector_settings:)
|
161
188
|
body = {
|
162
|
-
:
|
163
|
-
:
|
164
|
-
:
|
165
|
-
:
|
166
|
-
|
167
|
-
|
168
|
-
:
|
169
|
-
:
|
189
|
+
status: Connectors::SyncStatus::PENDING,
|
190
|
+
created_at: Time.now,
|
191
|
+
last_seen: Time.now,
|
192
|
+
connector: {
|
193
|
+
id: connector_settings.id,
|
194
|
+
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
195
|
+
index_name: connector_settings.index_name,
|
196
|
+
language: connector_settings[:language],
|
197
|
+
pipeline: connector_settings[:pipeline],
|
198
|
+
service_type: connector_settings.service_type
|
170
199
|
}
|
171
200
|
}
|
172
201
|
|
173
|
-
index_response = client.index(:
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
:id => index_response['_id'],
|
179
|
-
:ignore => 404
|
180
|
-
).with_indifferent_access
|
181
|
-
end
|
182
|
-
raise JobNotCreatedError.new(connector_id, index_response)
|
202
|
+
index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
|
203
|
+
|
204
|
+
return index_response if index_response['result'] == 'created'
|
205
|
+
|
206
|
+
raise JobNotCreatedError.new(connector_settings.id, index_response)
|
183
207
|
end
|
184
208
|
|
185
209
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
@@ -207,37 +231,6 @@ module Core
|
|
207
231
|
update_connector_fields(connector_id, body)
|
208
232
|
end
|
209
233
|
|
210
|
-
def update_sync(job_id, metadata)
|
211
|
-
body = {
|
212
|
-
:doc => { :last_seen => Time.now }.merge(metadata)
|
213
|
-
}
|
214
|
-
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
215
|
-
end
|
216
|
-
|
217
|
-
def complete_sync(connector_id, job_id, metadata, error)
|
218
|
-
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
219
|
-
|
220
|
-
metadata ||= {}
|
221
|
-
|
222
|
-
update_connector_fields(connector_id,
|
223
|
-
:last_sync_status => sync_status,
|
224
|
-
:last_sync_error => error,
|
225
|
-
:error => error,
|
226
|
-
:last_synced => Time.now,
|
227
|
-
:last_indexed_document_count => metadata[:indexed_document_count],
|
228
|
-
:last_deleted_document_count => metadata[:deleted_document_count])
|
229
|
-
|
230
|
-
body = {
|
231
|
-
:doc => {
|
232
|
-
:status => sync_status,
|
233
|
-
:completed_at => Time.now,
|
234
|
-
:last_seen => Time.now,
|
235
|
-
:error => error
|
236
|
-
}.merge(metadata)
|
237
|
-
}
|
238
|
-
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
239
|
-
end
|
240
|
-
|
241
234
|
def fetch_document_ids(index_name)
|
242
235
|
page_size = 1000
|
243
236
|
result = []
|
@@ -507,31 +500,15 @@ module Core
|
|
507
500
|
end
|
508
501
|
|
509
502
|
def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
:refresh => true,
|
516
|
-
:retry_on_conflict => 3
|
517
|
-
}
|
518
|
-
# seq_no and primary_term are used for optimistic concurrency control
|
519
|
-
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
|
520
|
-
if seq_no && primary_term
|
521
|
-
update_args[:if_seq_no] = seq_no
|
522
|
-
update_args[:if_primary_term] = primary_term
|
523
|
-
update_args.delete(:retry_on_conflict)
|
524
|
-
end
|
525
|
-
begin
|
526
|
-
client.update(update_args)
|
527
|
-
rescue Elastic::Transport::Transport::Errors::Conflict
|
528
|
-
# VersionConflictException
|
529
|
-
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
530
|
-
raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
|
531
|
-
end
|
503
|
+
update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
|
504
|
+
end
|
505
|
+
|
506
|
+
def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
|
507
|
+
update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
|
532
508
|
end
|
533
509
|
|
534
510
|
def document_count(index_name)
|
511
|
+
client.indices.refresh(:index => index_name)
|
535
512
|
client.count(:index => index_name)['count']
|
536
513
|
end
|
537
514
|
|
@@ -563,6 +540,31 @@ module Core
|
|
563
540
|
filter.deep_merge!(new_validation_state)
|
564
541
|
end
|
565
542
|
end
|
543
|
+
|
544
|
+
def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
|
545
|
+
return if doc.empty?
|
546
|
+
update_args = {
|
547
|
+
:index => index,
|
548
|
+
:id => id,
|
549
|
+
:body => { :doc => doc },
|
550
|
+
:refresh => true,
|
551
|
+
:retry_on_conflict => 3
|
552
|
+
}
|
553
|
+
|
554
|
+
if seq_no && primary_term
|
555
|
+
update_args[:if_seq_no] = seq_no
|
556
|
+
update_args[:if_primary_term] = primary_term
|
557
|
+
update_args.delete(:retry_on_conflict)
|
558
|
+
end
|
559
|
+
|
560
|
+
begin
|
561
|
+
client.update(update_args)
|
562
|
+
rescue Elastic::Transport::Transport::Errors::Conflict
|
563
|
+
# VersionConflictException
|
564
|
+
# see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
|
565
|
+
raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
|
566
|
+
end
|
567
|
+
end
|
566
568
|
end
|
567
569
|
end
|
568
570
|
end
|
data/lib/core/scheduler.rb
CHANGED
@@ -90,13 +90,6 @@ module Core
|
|
90
90
|
return false
|
91
91
|
end
|
92
92
|
|
93
|
-
# We want to sync when sync never actually happened
|
94
|
-
last_synced = connector_settings[:last_synced]
|
95
|
-
if last_synced.nil? || last_synced.empty?
|
96
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
97
|
-
return true
|
98
|
-
end
|
99
|
-
|
100
93
|
current_schedule = scheduling_settings[:interval]
|
101
94
|
|
102
95
|
# Don't sync if there is no actual scheduling interval
|
@@ -119,6 +112,13 @@ module Core
|
|
119
112
|
return false
|
120
113
|
end
|
121
114
|
|
115
|
+
# We want to sync when sync never actually happened
|
116
|
+
last_synced = connector_settings[:last_synced]
|
117
|
+
if last_synced.nil? || last_synced.empty?
|
118
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
119
|
+
return true
|
120
|
+
end
|
121
|
+
|
122
122
|
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
123
123
|
|
124
124
|
# Sync if next trigger for the connector is in past
|
data/lib/utility/bulk_queue.rb
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
|
7
7
|
require 'json'
|
8
8
|
|
9
|
+
require 'utility/constants'
|
10
|
+
|
9
11
|
module Utility
|
10
12
|
class BulkQueue
|
11
13
|
class QueueOverflowError < StandardError; end
|
12
14
|
|
13
15
|
# 500 items or 5MB
|
14
|
-
def initialize(operation_count_threshold =
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
15
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
16
18
|
@size_threshold = size_threshold.freeze
|
17
19
|
|
data/lib/utility/constants.rb
CHANGED
@@ -18,5 +18,10 @@ module Utility
|
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
19
|
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
20
|
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
|
+
|
22
|
+
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
|
+
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
24
|
+
# Maximum size of either whole BULK Elasticsearch operation or one document in it
|
25
|
+
DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
|
21
26
|
end
|
22
27
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'utility/errors'
|
11
|
+
require 'utility/exception_tracking'
|
12
|
+
|
13
|
+
module Utility
|
14
|
+
class ErrorMonitor
|
15
|
+
class MonitoringError < StandardError
|
16
|
+
attr_accessor :tripped_by
|
17
|
+
|
18
|
+
def initialize(message = nil, tripped_by: nil)
|
19
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
20
|
+
@tripped_by = tripped_by
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
25
|
+
class MaxErrorsExceededError < MonitoringError; end
|
26
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
27
|
+
|
28
|
+
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
29
|
+
|
30
|
+
def initialize(
|
31
|
+
max_errors: 1000,
|
32
|
+
max_consecutive_errors: 10,
|
33
|
+
max_error_ratio: 0.15,
|
34
|
+
window_size: 100,
|
35
|
+
error_queue_size: 20
|
36
|
+
)
|
37
|
+
@max_errors = max_errors
|
38
|
+
@max_consecutive_errors = max_consecutive_errors
|
39
|
+
@max_error_ratio = max_error_ratio
|
40
|
+
@window_size = window_size
|
41
|
+
@total_error_count = 0
|
42
|
+
@success_count = 0
|
43
|
+
@consecutive_error_count = 0
|
44
|
+
@window_errors = Array.new(window_size) { false }
|
45
|
+
@window_index = 0
|
46
|
+
@last_error = nil
|
47
|
+
@error_queue_size = error_queue_size
|
48
|
+
@error_queue = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def note_success
|
52
|
+
@consecutive_error_count = 0
|
53
|
+
@success_count += 1
|
54
|
+
increment_window_index
|
55
|
+
end
|
56
|
+
|
57
|
+
def note_error(error, id: Time.now.to_i)
|
58
|
+
stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
|
59
|
+
error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
|
60
|
+
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
|
+
@total_error_count += 1
|
62
|
+
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
64
|
+
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
|
+
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
+
increment_window_index
|
67
|
+
@last_error = error
|
68
|
+
|
69
|
+
raise_if_necessary
|
70
|
+
end
|
71
|
+
|
72
|
+
def finalize
|
73
|
+
total_documents = @total_error_count + @success_count
|
74
|
+
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
75
|
+
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def raise_if_necessary
|
82
|
+
error =
|
83
|
+
if @consecutive_error_count > @max_consecutive_errors
|
84
|
+
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
85
|
+
elsif @total_error_count > @max_errors
|
86
|
+
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
87
|
+
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
88
|
+
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
89
|
+
end
|
90
|
+
|
91
|
+
raise_with_last_cause(error) if error
|
92
|
+
end
|
93
|
+
|
94
|
+
def num_errors_in_window
|
95
|
+
@window_errors.count(&:itself).to_f
|
96
|
+
end
|
97
|
+
|
98
|
+
def increment_window_index
|
99
|
+
@window_index = (@window_index + 1) % @window_size
|
100
|
+
end
|
101
|
+
|
102
|
+
def raise_with_last_cause(error)
|
103
|
+
raise @last_error
|
104
|
+
rescue StandardError
|
105
|
+
raise error
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/utility/errors.rb
CHANGED
@@ -60,18 +60,6 @@ module Utility
|
|
60
60
|
class JobDocumentLimitError < StandardError; end
|
61
61
|
class JobClaimingError < StandardError; end
|
62
62
|
|
63
|
-
class MonitoringError < StandardError
|
64
|
-
attr_accessor :tripped_by
|
65
|
-
|
66
|
-
def initialize(message = nil, tripped_by: nil)
|
67
|
-
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
68
|
-
@tripped_by = tripped_by
|
69
|
-
end
|
70
|
-
end
|
71
|
-
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
72
|
-
class MaxErrorsExceededError < MonitoringError; end
|
73
|
-
class MaxErrorsInWindowExceededError < MonitoringError; end
|
74
|
-
|
75
63
|
class JobSyncNotPossibleYetError < StandardError
|
76
64
|
attr_accessor :sync_will_be_possible_at
|
77
65
|
|
data/lib/utility.rb
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
# !!!!!!!!
|
8
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
7
9
|
require 'utility/bulk_queue'
|
8
10
|
require 'utility/common'
|
9
11
|
require 'utility/constants'
|
@@ -11,9 +13,12 @@ require 'utility/cron'
|
|
11
13
|
require 'utility/elasticsearch/index/mappings'
|
12
14
|
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
15
|
require 'utility/environment'
|
16
|
+
require 'utility/error_monitor'
|
14
17
|
require 'utility/errors'
|
15
18
|
require 'utility/filtering'
|
16
19
|
require 'utility/es_client'
|
17
20
|
require 'utility/exception_tracking'
|
18
21
|
require 'utility/extension_mapping_util'
|
19
22
|
require 'utility/logger'
|
23
|
+
# IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
|
24
|
+
# !!!!!!!!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.
|
4
|
+
version: 8.6.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- lib/connectors/crawler/scheduler.rb
|
107
107
|
- lib/connectors/sync_status.rb
|
108
108
|
- lib/connectors_utility.rb
|
109
|
+
- lib/core/connector_job.rb
|
109
110
|
- lib/core/connector_settings.rb
|
110
111
|
- lib/core/elastic_connector_actions.rb
|
111
112
|
- lib/core/filtering/validation_status.rb
|
@@ -119,6 +120,7 @@ files:
|
|
119
120
|
- lib/utility/elasticsearch/index/mappings.rb
|
120
121
|
- lib/utility/elasticsearch/index/text_analysis_settings.rb
|
121
122
|
- lib/utility/environment.rb
|
123
|
+
- lib/utility/error_monitor.rb
|
122
124
|
- lib/utility/errors.rb
|
123
125
|
- lib/utility/es_client.rb
|
124
126
|
- lib/utility/exception_tracking.rb
|
@@ -129,8 +131,8 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
129
131
|
licenses:
|
130
132
|
- Elastic-2.0
|
131
133
|
metadata:
|
132
|
-
revision:
|
133
|
-
repository: git@github.com:elastic/
|
134
|
+
revision: 39cbb85dbae57a2c92e6e0da272d05aa24ca99a9
|
135
|
+
repository: git@github.com:elastic/connectors-ruby.git
|
134
136
|
post_install_message:
|
135
137
|
rdoc_options: []
|
136
138
|
require_paths:
|
@@ -142,9 +144,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
142
144
|
version: '0'
|
143
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
146
|
requirements:
|
145
|
-
- - "
|
147
|
+
- - ">="
|
146
148
|
- !ruby/object:Gem::Version
|
147
|
-
version:
|
149
|
+
version: '0'
|
148
150
|
requirements: []
|
149
151
|
rubygems_version: 3.0.3.1
|
150
152
|
signing_key:
|