connectors_utility 8.6.0.7 → 8.7.0.0.pre.20221117T004939Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors_utility.rb +3 -6
- data/lib/core/connector_settings.rb +19 -21
- data/lib/core/elastic_connector_actions.rb +32 -15
- data/lib/core/scheduler.rb +8 -8
- data/lib/utility/bulk_queue.rb +1 -1
- data/lib/utility/error_monitor.rb +5 -26
- data/lib/utility/filtering.rb +0 -4
- metadata +9 -11
- data/lib/connectors/job_trigger_method.rb +0 -14
- data/lib/core/connector_job.rb +0 -252
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d2972f8e6974a79b6088ce6c03453c327132ce19ffb09dbf30f349eae4c2108
|
4
|
+
data.tar.gz: 4fd458de07be07923e0675dc0f341b8211ba3daeec8ac27bfb4f9eb9aff2334a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9db02a3003d5645cbb5d57d4ca1bdb1acb65234f5a931afd9ccb06e2fbbe25be2394c65a72a9ae36038c8c127c35a0d937c83558ede0e4960fc688b073db052a
|
7
|
+
data.tar.gz: d02681e0d4009420b949ec649c9eac52bd533eee493c181ea1ffb15d939b561e81f234bac4885eefd0ea82c8564d7d330c0635c535b0616cfb9280e2e38512df
|
data/lib/connectors_utility.rb
CHANGED
@@ -9,11 +9,8 @@
|
|
9
9
|
require_relative 'utility'
|
10
10
|
|
11
11
|
require_relative 'connectors/connector_status'
|
12
|
-
require_relative 'connectors/crawler/scheduler'
|
13
|
-
require_relative 'connectors/job_trigger_method'
|
14
12
|
require_relative 'connectors/sync_status'
|
15
|
-
require_relative 'core/connector_job'
|
16
|
-
require_relative 'core/connector_settings'
|
17
|
-
require_relative 'core/elastic_connector_actions'
|
18
|
-
require_relative 'core/filtering/validation_status'
|
19
13
|
require_relative 'core/scheduler'
|
14
|
+
require_relative 'core/elastic_connector_actions'
|
15
|
+
|
16
|
+
require_relative 'connectors/crawler/scheduler'
|
@@ -8,7 +8,6 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
-
require 'connectors/sync_status'
|
12
11
|
require 'core/elastic_connector_actions'
|
13
12
|
require 'utility'
|
14
13
|
|
@@ -50,11 +49,6 @@ module Core
|
|
50
49
|
fetch_connectors_by_query(query, page_size)
|
51
50
|
end
|
52
51
|
|
53
|
-
def self.fetch_all_connectors(page_size = DEFAULT_PAGE_SIZE)
|
54
|
-
query = { match_all: {} }
|
55
|
-
fetch_connectors_by_query(query, page_size)
|
56
|
-
end
|
57
|
-
|
58
52
|
def id
|
59
53
|
@elasticsearch_response[:_id]
|
60
54
|
end
|
@@ -88,10 +82,6 @@ module Core
|
|
88
82
|
self[:scheduling]
|
89
83
|
end
|
90
84
|
|
91
|
-
def sync_now?
|
92
|
-
self[:sync_now] == true
|
93
|
-
end
|
94
|
-
|
95
85
|
def filtering
|
96
86
|
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
97
87
|
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
@@ -103,6 +93,18 @@ module Core
|
|
103
93
|
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
104
94
|
end
|
105
95
|
|
96
|
+
def extract_binary_content?
|
97
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
98
|
+
end
|
99
|
+
|
100
|
+
def reduce_whitespace?
|
101
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
102
|
+
end
|
103
|
+
|
104
|
+
def run_ml_inference?
|
105
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
106
|
+
end
|
107
|
+
|
106
108
|
def formatted
|
107
109
|
properties = ["ID: #{id}"]
|
108
110
|
properties << "Service type: #{service_type}" if service_type
|
@@ -128,23 +130,19 @@ module Core
|
|
128
130
|
end
|
129
131
|
|
130
132
|
def update_last_sync!(job)
|
131
|
-
# if job is nil, connector still needs to be updated, to avoid it stuck at in_progress
|
132
|
-
job_status = job&.status || Connectors::SyncStatus::ERROR
|
133
|
-
job_error = job.nil? ? 'Could\'t find the job' : job.error
|
134
|
-
job_error ||= 'unknown error' if job_status == Connectors::SyncStatus::ERROR
|
135
|
-
connector_status = (job_status == Connectors::SyncStatus::ERROR ? Connectors::ConnectorStatus::ERROR : Connectors::ConnectorStatus::CONNECTED)
|
136
133
|
doc = {
|
137
|
-
:last_sync_status =>
|
134
|
+
:last_sync_status => job.status,
|
138
135
|
:last_synced => Time.now,
|
139
|
-
:last_sync_error =>
|
140
|
-
:
|
141
|
-
:error => job_error
|
136
|
+
:last_sync_error => job.error,
|
137
|
+
:error => job.error
|
142
138
|
}
|
143
|
-
|
139
|
+
|
140
|
+
if job.terminated?
|
144
141
|
doc[:last_indexed_document_count] = job[:indexed_document_count]
|
145
142
|
doc[:last_deleted_document_count] = job[:deleted_document_count]
|
146
143
|
end
|
147
|
-
|
144
|
+
|
145
|
+
Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
|
148
146
|
end
|
149
147
|
|
150
148
|
private
|
@@ -8,7 +8,6 @@
|
|
8
8
|
#
|
9
9
|
require 'active_support/core_ext/hash'
|
10
10
|
require 'connectors/connector_status'
|
11
|
-
require 'connectors/job_trigger_method'
|
12
11
|
require 'connectors/sync_status'
|
13
12
|
require 'utility'
|
14
13
|
require 'elastic-transport'
|
@@ -92,17 +91,6 @@ module Core
|
|
92
91
|
)
|
93
92
|
end
|
94
93
|
|
95
|
-
def delete_jobs_by_query(query)
|
96
|
-
client.delete_by_query(
|
97
|
-
:index => Utility::Constants::JOB_INDEX,
|
98
|
-
:body => { :query => query }
|
99
|
-
)
|
100
|
-
end
|
101
|
-
|
102
|
-
def delete_indices(indices)
|
103
|
-
client.indices.delete(:index => indices, :ignore_unavailable => true)
|
104
|
-
end
|
105
|
-
|
106
94
|
def update_connector_configuration(connector_id, configuration)
|
107
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
108
96
|
end
|
@@ -190,15 +178,13 @@ module Core
|
|
190
178
|
status: Connectors::SyncStatus::PENDING,
|
191
179
|
created_at: Time.now,
|
192
180
|
last_seen: Time.now,
|
193
|
-
trigger_method: connector_settings.sync_now? ? Connectors::JobTriggerMethod::ON_DEMAND : Connectors::JobTriggerMethod::SCHEDULED,
|
194
181
|
connector: {
|
195
182
|
id: connector_settings.id,
|
196
183
|
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
197
184
|
index_name: connector_settings.index_name,
|
198
185
|
language: connector_settings[:language],
|
199
186
|
pipeline: connector_settings[:pipeline],
|
200
|
-
service_type: connector_settings.service_type
|
201
|
-
configuration: connector_settings.configuration
|
187
|
+
service_type: connector_settings.service_type
|
202
188
|
}
|
203
189
|
}
|
204
190
|
|
@@ -234,6 +220,37 @@ module Core
|
|
234
220
|
update_connector_fields(connector_id, body)
|
235
221
|
end
|
236
222
|
|
223
|
+
def update_sync(job_id, metadata)
|
224
|
+
body = {
|
225
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
226
|
+
}
|
227
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
228
|
+
end
|
229
|
+
|
230
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
231
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
232
|
+
|
233
|
+
metadata ||= {}
|
234
|
+
|
235
|
+
update_connector_fields(connector_id,
|
236
|
+
:last_sync_status => sync_status,
|
237
|
+
:last_sync_error => error,
|
238
|
+
:error => error,
|
239
|
+
:last_synced => Time.now,
|
240
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
241
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
242
|
+
|
243
|
+
body = {
|
244
|
+
:doc => {
|
245
|
+
:status => sync_status,
|
246
|
+
:completed_at => Time.now,
|
247
|
+
:last_seen => Time.now,
|
248
|
+
:error => error
|
249
|
+
}.merge(metadata)
|
250
|
+
}
|
251
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
252
|
+
end
|
253
|
+
|
237
254
|
def fetch_document_ids(index_name)
|
238
255
|
page_size = 1000
|
239
256
|
result = []
|
data/lib/core/scheduler.rb
CHANGED
@@ -78,7 +78,7 @@ module Core
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# Sync when sync_now flag is true for the connector
|
81
|
-
if connector_settings
|
81
|
+
if connector_settings[:sync_now] == true
|
82
82
|
Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
|
83
83
|
return true
|
84
84
|
end
|
@@ -90,6 +90,13 @@ module Core
|
|
90
90
|
return false
|
91
91
|
end
|
92
92
|
|
93
|
+
# We want to sync when sync never actually happened
|
94
|
+
last_synced = connector_settings[:last_synced]
|
95
|
+
if last_synced.nil? || last_synced.empty?
|
96
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
97
|
+
return true
|
98
|
+
end
|
99
|
+
|
93
100
|
current_schedule = scheduling_settings[:interval]
|
94
101
|
|
95
102
|
# Don't sync if there is no actual scheduling interval
|
@@ -112,13 +119,6 @@ module Core
|
|
112
119
|
return false
|
113
120
|
end
|
114
121
|
|
115
|
-
# We want to sync when sync never actually happened
|
116
|
-
last_synced = connector_settings[:last_synced]
|
117
|
-
if last_synced.nil? || last_synced.empty?
|
118
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
119
|
-
return true
|
120
|
-
end
|
121
|
-
|
122
122
|
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
123
123
|
|
124
124
|
# Sync if next trigger for the connector is in past
|
data/lib/utility/bulk_queue.rb
CHANGED
@@ -13,7 +13,7 @@ module Utility
|
|
13
13
|
class QueueOverflowError < StandardError; end
|
14
14
|
|
15
15
|
# 500 items or 5MB
|
16
|
-
def initialize(operation_count_threshold = Utility::Constants::
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
17
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
18
18
|
@size_threshold = size_threshold.freeze
|
19
19
|
|
@@ -51,7 +51,7 @@ module Utility
|
|
51
51
|
def note_success
|
52
52
|
@consecutive_error_count = 0
|
53
53
|
@success_count += 1
|
54
|
-
|
54
|
+
increment_window_index
|
55
55
|
end
|
56
56
|
|
57
57
|
def note_error(error, id: Time.now.to_i)
|
@@ -60,9 +60,10 @@ module Utility
|
|
60
60
|
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
61
|
@total_error_count += 1
|
62
62
|
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
63
64
|
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
64
65
|
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
65
|
-
|
66
|
+
increment_window_index
|
66
67
|
@last_error = error
|
67
68
|
|
68
69
|
raise_if_necessary
|
@@ -91,32 +92,10 @@ module Utility
|
|
91
92
|
end
|
92
93
|
|
93
94
|
def num_errors_in_window
|
94
|
-
@window_errors.count(
|
95
|
+
@window_errors.count(&:itself).to_f
|
95
96
|
end
|
96
97
|
|
97
|
-
def
|
98
|
-
# We keep the errors array of the size @window_size this way, imagine @window_size = 5
|
99
|
-
# Error array inits as falses:
|
100
|
-
# [ false, false, false, false, false ]
|
101
|
-
# Third document raises an error:
|
102
|
-
# [ false, false, true, false, false ]
|
103
|
-
# ^^^^
|
104
|
-
# 2 % 5 == 2
|
105
|
-
# Fifth document raises an error:
|
106
|
-
# [ false, false, true, false, true ]
|
107
|
-
# ^^^^
|
108
|
-
# 4 % 5 == 4
|
109
|
-
# Sixth document raises an error:
|
110
|
-
# [ true, false, true, false, true ]
|
111
|
-
# ^^^^
|
112
|
-
# 5 % 5 == 0
|
113
|
-
#
|
114
|
-
# Eigth document is successful:
|
115
|
-
# [ true, false, false, false, true ]
|
116
|
-
# ^^^^^
|
117
|
-
# 7 % 5 == 2
|
118
|
-
# And so on.
|
119
|
-
@window_errors[@window_index] = is_error
|
98
|
+
def increment_window_index
|
120
99
|
@window_index = (@window_index + 1) % @window_size
|
121
100
|
end
|
122
101
|
|
data/lib/utility/filtering.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T004939Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -104,10 +104,8 @@ files:
|
|
104
104
|
- NOTICE.txt
|
105
105
|
- lib/connectors/connector_status.rb
|
106
106
|
- lib/connectors/crawler/scheduler.rb
|
107
|
-
- lib/connectors/job_trigger_method.rb
|
108
107
|
- lib/connectors/sync_status.rb
|
109
108
|
- lib/connectors_utility.rb
|
110
|
-
- lib/core/connector_job.rb
|
111
109
|
- lib/core/connector_settings.rb
|
112
110
|
- lib/core/elastic_connector_actions.rb
|
113
111
|
- lib/core/filtering/validation_status.rb
|
@@ -132,9 +130,9 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
132
130
|
licenses:
|
133
131
|
- Elastic-2.0
|
134
132
|
metadata:
|
135
|
-
revision:
|
136
|
-
repository: https://github.com/elastic/connectors-ruby
|
137
|
-
post_install_message:
|
133
|
+
revision: 294214a26b0fe9a4347763b01de681c336e8daae
|
134
|
+
repository: https://github.com/elastic/connectors-ruby.git
|
135
|
+
post_install_message:
|
138
136
|
rdoc_options: []
|
139
137
|
require_paths:
|
140
138
|
- lib
|
@@ -145,12 +143,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
145
143
|
version: '0'
|
146
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
145
|
requirements:
|
148
|
-
- - "
|
146
|
+
- - ">"
|
149
147
|
- !ruby/object:Gem::Version
|
150
|
-
version:
|
148
|
+
version: 1.3.1
|
151
149
|
requirements: []
|
152
150
|
rubygems_version: 3.0.3.1
|
153
|
-
signing_key:
|
151
|
+
signing_key:
|
154
152
|
specification_version: 4
|
155
153
|
summary: Gem containing shared Connector Services libraries
|
156
154
|
test_files: []
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Connectors
|
10
|
-
class JobTriggerMethod
|
11
|
-
ON_DEMAND = 'on_demand'
|
12
|
-
SCHEDULED = 'scheduled'
|
13
|
-
end
|
14
|
-
end
|
data/lib/core/connector_job.rb
DELETED
@@ -1,252 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
-
require 'connectors/sync_status'
|
11
|
-
require 'core/connector_settings'
|
12
|
-
require 'core/elastic_connector_actions'
|
13
|
-
require 'utility'
|
14
|
-
|
15
|
-
module Core
|
16
|
-
class ConnectorJob
|
17
|
-
DEFAULT_PAGE_SIZE = 100
|
18
|
-
STUCK_THRESHOLD = 60
|
19
|
-
|
20
|
-
def self.fetch_by_id(job_id)
|
21
|
-
es_response = ElasticConnectorActions.get_job(job_id)
|
22
|
-
return nil unless es_response[:found]
|
23
|
-
|
24
|
-
new(es_response)
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.pending_jobs(connectors_ids: [], page_size: DEFAULT_PAGE_SIZE)
|
28
|
-
status_term = { status: Connectors::SyncStatus::PENDING_STATUSES }
|
29
|
-
|
30
|
-
query = { bool: { must: [{ terms: status_term }] } }
|
31
|
-
|
32
|
-
return fetch_jobs_by_query(query, page_size) if connectors_ids.empty?
|
33
|
-
|
34
|
-
query[:bool][:must] << { terms: { 'connector.id' => connectors_ids } }
|
35
|
-
|
36
|
-
fetch_jobs_by_query(query, page_size)
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
|
40
|
-
connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
|
41
|
-
query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
|
42
|
-
fetch_jobs_by_query(query, page_size)
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.delete_jobs(jobs)
|
46
|
-
query = { terms: { '_id': jobs.map(&:id) } }
|
47
|
-
ElasticConnectorActions.delete_jobs_by_query(query)
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.stuck_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
|
51
|
-
connector_ids = if connector_id
|
52
|
-
[connector_id]
|
53
|
-
else
|
54
|
-
ConnectorSettings.fetch_native_connectors.map(&:id)
|
55
|
-
end
|
56
|
-
query = {
|
57
|
-
bool: {
|
58
|
-
filter: [
|
59
|
-
{ terms: { 'connector.id': connector_ids } },
|
60
|
-
{ terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
|
61
|
-
{ range: { last_seen: { lte: "now-#{STUCK_THRESHOLD}s" } } }
|
62
|
-
]
|
63
|
-
}
|
64
|
-
}
|
65
|
-
fetch_jobs_by_query(query, page_size)
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.enqueue(_connector_id)
|
69
|
-
nil
|
70
|
-
end
|
71
|
-
|
72
|
-
def id
|
73
|
-
@elasticsearch_response[:_id]
|
74
|
-
end
|
75
|
-
|
76
|
-
def [](property_name)
|
77
|
-
@elasticsearch_response[:_source][property_name]
|
78
|
-
end
|
79
|
-
|
80
|
-
def error
|
81
|
-
self[:error]
|
82
|
-
end
|
83
|
-
|
84
|
-
def status
|
85
|
-
self[:status]
|
86
|
-
end
|
87
|
-
|
88
|
-
def in_progress?
|
89
|
-
status == Connectors::SyncStatus::IN_PROGRESS
|
90
|
-
end
|
91
|
-
|
92
|
-
def canceling?
|
93
|
-
status == Connectors::SyncStatus::CANCELING
|
94
|
-
end
|
95
|
-
|
96
|
-
def suspended?
|
97
|
-
status == Connectors::SyncStatus::SUSPENDED
|
98
|
-
end
|
99
|
-
|
100
|
-
def canceled?
|
101
|
-
status == Connectors::SyncStatus::CANCELED
|
102
|
-
end
|
103
|
-
|
104
|
-
def pending?
|
105
|
-
Connectors::SyncStatus::PENDING_STATUSES.include?(status)
|
106
|
-
end
|
107
|
-
|
108
|
-
def active?
|
109
|
-
Connectors::SyncStatus::ACTIVE_STATUSES.include?(status)
|
110
|
-
end
|
111
|
-
|
112
|
-
def terminated?
|
113
|
-
Connectors::SyncStatus::TERMINAL_STATUSES.include?(status)
|
114
|
-
end
|
115
|
-
|
116
|
-
def connector_snapshot
|
117
|
-
self[:connector] || {}
|
118
|
-
end
|
119
|
-
|
120
|
-
def connector_id
|
121
|
-
connector_snapshot[:id]
|
122
|
-
end
|
123
|
-
|
124
|
-
def index_name
|
125
|
-
connector_snapshot[:index_name]
|
126
|
-
end
|
127
|
-
|
128
|
-
def language
|
129
|
-
connector_snapshot[:language]
|
130
|
-
end
|
131
|
-
|
132
|
-
def service_type
|
133
|
-
connector_snapshot[:service_type]
|
134
|
-
end
|
135
|
-
|
136
|
-
def configuration
|
137
|
-
connector_snapshot[:configuration]
|
138
|
-
end
|
139
|
-
|
140
|
-
def filtering
|
141
|
-
connector_snapshot[:filtering]
|
142
|
-
end
|
143
|
-
|
144
|
-
def pipeline
|
145
|
-
connector_snapshot[:pipeline] || {}
|
146
|
-
end
|
147
|
-
|
148
|
-
def extract_binary_content?
|
149
|
-
pipeline[:extract_binary_content]
|
150
|
-
end
|
151
|
-
|
152
|
-
def reduce_whitespace?
|
153
|
-
pipeline[:reduce_whitespace]
|
154
|
-
end
|
155
|
-
|
156
|
-
def run_ml_inference?
|
157
|
-
pipeline[:run_ml_inference]
|
158
|
-
end
|
159
|
-
|
160
|
-
def connector
|
161
|
-
@connector ||= ConnectorSettings.fetch_by_id(connector_id)
|
162
|
-
end
|
163
|
-
|
164
|
-
def update_metadata(ingestion_stats = {}, connector_metadata = {})
|
165
|
-
ingestion_stats ||= {}
|
166
|
-
doc = { :last_seen => Time.now }.merge(ingestion_stats)
|
167
|
-
doc[:metadata] = connector_metadata if connector_metadata&.any?
|
168
|
-
ElasticConnectorActions.update_job_fields(id, doc)
|
169
|
-
end
|
170
|
-
|
171
|
-
def done!(ingestion_stats = {}, connector_metadata = {})
|
172
|
-
terminate!(Connectors::SyncStatus::COMPLETED, nil, ingestion_stats, connector_metadata)
|
173
|
-
end
|
174
|
-
|
175
|
-
def error!(message, ingestion_stats = {}, connector_metadata = {})
|
176
|
-
terminate!(Connectors::SyncStatus::ERROR, message, ingestion_stats, connector_metadata)
|
177
|
-
end
|
178
|
-
|
179
|
-
def cancel!(ingestion_stats = {}, connector_metadata = {})
|
180
|
-
terminate!(Connectors::SyncStatus::CANCELED, nil, ingestion_stats, connector_metadata)
|
181
|
-
end
|
182
|
-
|
183
|
-
def with_concurrency_control
|
184
|
-
response = ElasticConnectorActions.get_job(id)
|
185
|
-
|
186
|
-
yield response, response['_seq_no'], response['_primary_term']
|
187
|
-
end
|
188
|
-
|
189
|
-
def make_running!
|
190
|
-
with_concurrency_control do |es_doc, seq_no, primary_term|
|
191
|
-
now = Time.now
|
192
|
-
doc = {
|
193
|
-
status: Connectors::SyncStatus::IN_PROGRESS,
|
194
|
-
started_at: now,
|
195
|
-
last_seen: now,
|
196
|
-
worker_hostname: Socket.gethostname
|
197
|
-
}
|
198
|
-
|
199
|
-
ElasticConnectorActions.update_job_fields(es_doc[:_id], doc, seq_no, primary_term)
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
def es_source
|
204
|
-
@elasticsearch_response[:_source]
|
205
|
-
end
|
206
|
-
|
207
|
-
private
|
208
|
-
|
209
|
-
def self.fetch_jobs_by_query(query, page_size)
|
210
|
-
results = []
|
211
|
-
offset = 0
|
212
|
-
loop do
|
213
|
-
response = ElasticConnectorActions.search_jobs(query, page_size, offset)
|
214
|
-
|
215
|
-
hits = response.dig('hits', 'hits') || []
|
216
|
-
total = response.dig('hits', 'total', 'value') || 0
|
217
|
-
results += hits.map { |hit| new(hit) }
|
218
|
-
break if results.size >= total
|
219
|
-
offset += hits.size
|
220
|
-
end
|
221
|
-
|
222
|
-
results
|
223
|
-
end
|
224
|
-
|
225
|
-
def initialize(es_response)
|
226
|
-
# TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
|
227
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
228
|
-
end
|
229
|
-
|
230
|
-
def terminate!(status, error = nil, ingestion_stats = {}, connector_metadata = {})
|
231
|
-
ingestion_stats ||= {}
|
232
|
-
ingestion_stats[:total_document_count] = ElasticConnectorActions.document_count(index_name)
|
233
|
-
doc = {
|
234
|
-
:last_seen => Time.now,
|
235
|
-
:completed_at => Time.now,
|
236
|
-
:status => status,
|
237
|
-
:error => error
|
238
|
-
}.merge(ingestion_stats)
|
239
|
-
doc[:canceled_at] = Time.now if status == Connectors::SyncStatus::CANCELED
|
240
|
-
doc[:metadata] = connector_metadata if connector_metadata&.any?
|
241
|
-
ElasticConnectorActions.update_job_fields(id, doc)
|
242
|
-
end
|
243
|
-
|
244
|
-
def seq_no
|
245
|
-
@elasticsearch_response[:_seq_no]
|
246
|
-
end
|
247
|
-
|
248
|
-
def primary_term
|
249
|
-
@elasticsearch_response[:_primary_term]
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|