connectors_utility 8.6.0.7 → 8.7.0.0.pre.20221117T004939Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors_utility.rb +3 -6
- data/lib/core/connector_settings.rb +19 -21
- data/lib/core/elastic_connector_actions.rb +32 -15
- data/lib/core/scheduler.rb +8 -8
- data/lib/utility/bulk_queue.rb +1 -1
- data/lib/utility/error_monitor.rb +5 -26
- data/lib/utility/filtering.rb +0 -4
- metadata +9 -11
- data/lib/connectors/job_trigger_method.rb +0 -14
- data/lib/core/connector_job.rb +0 -252
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d2972f8e6974a79b6088ce6c03453c327132ce19ffb09dbf30f349eae4c2108
|
4
|
+
data.tar.gz: 4fd458de07be07923e0675dc0f341b8211ba3daeec8ac27bfb4f9eb9aff2334a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9db02a3003d5645cbb5d57d4ca1bdb1acb65234f5a931afd9ccb06e2fbbe25be2394c65a72a9ae36038c8c127c35a0d937c83558ede0e4960fc688b073db052a
|
7
|
+
data.tar.gz: d02681e0d4009420b949ec649c9eac52bd533eee493c181ea1ffb15d939b561e81f234bac4885eefd0ea82c8564d7d330c0635c535b0616cfb9280e2e38512df
|
data/lib/connectors_utility.rb
CHANGED
@@ -9,11 +9,8 @@
|
|
9
9
|
require_relative 'utility'
|
10
10
|
|
11
11
|
require_relative 'connectors/connector_status'
|
12
|
-
require_relative 'connectors/crawler/scheduler'
|
13
|
-
require_relative 'connectors/job_trigger_method'
|
14
12
|
require_relative 'connectors/sync_status'
|
15
|
-
require_relative 'core/connector_job'
|
16
|
-
require_relative 'core/connector_settings'
|
17
|
-
require_relative 'core/elastic_connector_actions'
|
18
|
-
require_relative 'core/filtering/validation_status'
|
19
13
|
require_relative 'core/scheduler'
|
14
|
+
require_relative 'core/elastic_connector_actions'
|
15
|
+
|
16
|
+
require_relative 'connectors/crawler/scheduler'
|
@@ -8,7 +8,6 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
-
require 'connectors/sync_status'
|
12
11
|
require 'core/elastic_connector_actions'
|
13
12
|
require 'utility'
|
14
13
|
|
@@ -50,11 +49,6 @@ module Core
|
|
50
49
|
fetch_connectors_by_query(query, page_size)
|
51
50
|
end
|
52
51
|
|
53
|
-
def self.fetch_all_connectors(page_size = DEFAULT_PAGE_SIZE)
|
54
|
-
query = { match_all: {} }
|
55
|
-
fetch_connectors_by_query(query, page_size)
|
56
|
-
end
|
57
|
-
|
58
52
|
def id
|
59
53
|
@elasticsearch_response[:_id]
|
60
54
|
end
|
@@ -88,10 +82,6 @@ module Core
|
|
88
82
|
self[:scheduling]
|
89
83
|
end
|
90
84
|
|
91
|
-
def sync_now?
|
92
|
-
self[:sync_now] == true
|
93
|
-
end
|
94
|
-
|
95
85
|
def filtering
|
96
86
|
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
97
87
|
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
@@ -103,6 +93,18 @@ module Core
|
|
103
93
|
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
104
94
|
end
|
105
95
|
|
96
|
+
def extract_binary_content?
|
97
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
98
|
+
end
|
99
|
+
|
100
|
+
def reduce_whitespace?
|
101
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
102
|
+
end
|
103
|
+
|
104
|
+
def run_ml_inference?
|
105
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
106
|
+
end
|
107
|
+
|
106
108
|
def formatted
|
107
109
|
properties = ["ID: #{id}"]
|
108
110
|
properties << "Service type: #{service_type}" if service_type
|
@@ -128,23 +130,19 @@ module Core
|
|
128
130
|
end
|
129
131
|
|
130
132
|
def update_last_sync!(job)
|
131
|
-
# if job is nil, connector still needs to be updated, to avoid it stuck at in_progress
|
132
|
-
job_status = job&.status || Connectors::SyncStatus::ERROR
|
133
|
-
job_error = job.nil? ? 'Could\'t find the job' : job.error
|
134
|
-
job_error ||= 'unknown error' if job_status == Connectors::SyncStatus::ERROR
|
135
|
-
connector_status = (job_status == Connectors::SyncStatus::ERROR ? Connectors::ConnectorStatus::ERROR : Connectors::ConnectorStatus::CONNECTED)
|
136
133
|
doc = {
|
137
|
-
:last_sync_status =>
|
134
|
+
:last_sync_status => job.status,
|
138
135
|
:last_synced => Time.now,
|
139
|
-
:last_sync_error =>
|
140
|
-
:
|
141
|
-
:error => job_error
|
136
|
+
:last_sync_error => job.error,
|
137
|
+
:error => job.error
|
142
138
|
}
|
143
|
-
|
139
|
+
|
140
|
+
if job.terminated?
|
144
141
|
doc[:last_indexed_document_count] = job[:indexed_document_count]
|
145
142
|
doc[:last_deleted_document_count] = job[:deleted_document_count]
|
146
143
|
end
|
147
|
-
|
144
|
+
|
145
|
+
Core::ElasticConnectorActions.update_connector_fields(job.connector_id, doc)
|
148
146
|
end
|
149
147
|
|
150
148
|
private
|
@@ -8,7 +8,6 @@
|
|
8
8
|
#
|
9
9
|
require 'active_support/core_ext/hash'
|
10
10
|
require 'connectors/connector_status'
|
11
|
-
require 'connectors/job_trigger_method'
|
12
11
|
require 'connectors/sync_status'
|
13
12
|
require 'utility'
|
14
13
|
require 'elastic-transport'
|
@@ -92,17 +91,6 @@ module Core
|
|
92
91
|
)
|
93
92
|
end
|
94
93
|
|
95
|
-
def delete_jobs_by_query(query)
|
96
|
-
client.delete_by_query(
|
97
|
-
:index => Utility::Constants::JOB_INDEX,
|
98
|
-
:body => { :query => query }
|
99
|
-
)
|
100
|
-
end
|
101
|
-
|
102
|
-
def delete_indices(indices)
|
103
|
-
client.indices.delete(:index => indices, :ignore_unavailable => true)
|
104
|
-
end
|
105
|
-
|
106
94
|
def update_connector_configuration(connector_id, configuration)
|
107
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
108
96
|
end
|
@@ -190,15 +178,13 @@ module Core
|
|
190
178
|
status: Connectors::SyncStatus::PENDING,
|
191
179
|
created_at: Time.now,
|
192
180
|
last_seen: Time.now,
|
193
|
-
trigger_method: connector_settings.sync_now? ? Connectors::JobTriggerMethod::ON_DEMAND : Connectors::JobTriggerMethod::SCHEDULED,
|
194
181
|
connector: {
|
195
182
|
id: connector_settings.id,
|
196
183
|
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
197
184
|
index_name: connector_settings.index_name,
|
198
185
|
language: connector_settings[:language],
|
199
186
|
pipeline: connector_settings[:pipeline],
|
200
|
-
service_type: connector_settings.service_type
|
201
|
-
configuration: connector_settings.configuration
|
187
|
+
service_type: connector_settings.service_type
|
202
188
|
}
|
203
189
|
}
|
204
190
|
|
@@ -234,6 +220,37 @@ module Core
|
|
234
220
|
update_connector_fields(connector_id, body)
|
235
221
|
end
|
236
222
|
|
223
|
+
def update_sync(job_id, metadata)
|
224
|
+
body = {
|
225
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
226
|
+
}
|
227
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
228
|
+
end
|
229
|
+
|
230
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
231
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
232
|
+
|
233
|
+
metadata ||= {}
|
234
|
+
|
235
|
+
update_connector_fields(connector_id,
|
236
|
+
:last_sync_status => sync_status,
|
237
|
+
:last_sync_error => error,
|
238
|
+
:error => error,
|
239
|
+
:last_synced => Time.now,
|
240
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
241
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
242
|
+
|
243
|
+
body = {
|
244
|
+
:doc => {
|
245
|
+
:status => sync_status,
|
246
|
+
:completed_at => Time.now,
|
247
|
+
:last_seen => Time.now,
|
248
|
+
:error => error
|
249
|
+
}.merge(metadata)
|
250
|
+
}
|
251
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
252
|
+
end
|
253
|
+
|
237
254
|
def fetch_document_ids(index_name)
|
238
255
|
page_size = 1000
|
239
256
|
result = []
|
data/lib/core/scheduler.rb
CHANGED
@@ -78,7 +78,7 @@ module Core
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# Sync when sync_now flag is true for the connector
|
81
|
-
if connector_settings
|
81
|
+
if connector_settings[:sync_now] == true
|
82
82
|
Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
|
83
83
|
return true
|
84
84
|
end
|
@@ -90,6 +90,13 @@ module Core
|
|
90
90
|
return false
|
91
91
|
end
|
92
92
|
|
93
|
+
# We want to sync when sync never actually happened
|
94
|
+
last_synced = connector_settings[:last_synced]
|
95
|
+
if last_synced.nil? || last_synced.empty?
|
96
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
97
|
+
return true
|
98
|
+
end
|
99
|
+
|
93
100
|
current_schedule = scheduling_settings[:interval]
|
94
101
|
|
95
102
|
# Don't sync if there is no actual scheduling interval
|
@@ -112,13 +119,6 @@ module Core
|
|
112
119
|
return false
|
113
120
|
end
|
114
121
|
|
115
|
-
# We want to sync when sync never actually happened
|
116
|
-
last_synced = connector_settings[:last_synced]
|
117
|
-
if last_synced.nil? || last_synced.empty?
|
118
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
119
|
-
return true
|
120
|
-
end
|
121
|
-
|
122
122
|
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
123
123
|
|
124
124
|
# Sync if next trigger for the connector is in past
|
data/lib/utility/bulk_queue.rb
CHANGED
@@ -13,7 +13,7 @@ module Utility
|
|
13
13
|
class QueueOverflowError < StandardError; end
|
14
14
|
|
15
15
|
# 500 items or 5MB
|
16
|
-
def initialize(operation_count_threshold = Utility::Constants::
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
17
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
18
18
|
@size_threshold = size_threshold.freeze
|
19
19
|
|
@@ -51,7 +51,7 @@ module Utility
|
|
51
51
|
def note_success
|
52
52
|
@consecutive_error_count = 0
|
53
53
|
@success_count += 1
|
54
|
-
|
54
|
+
increment_window_index
|
55
55
|
end
|
56
56
|
|
57
57
|
def note_error(error, id: Time.now.to_i)
|
@@ -60,9 +60,10 @@ module Utility
|
|
60
60
|
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
61
|
@total_error_count += 1
|
62
62
|
@consecutive_error_count += 1
|
63
|
+
@window_errors[@window_index] = true
|
63
64
|
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
64
65
|
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
65
|
-
|
66
|
+
increment_window_index
|
66
67
|
@last_error = error
|
67
68
|
|
68
69
|
raise_if_necessary
|
@@ -91,32 +92,10 @@ module Utility
|
|
91
92
|
end
|
92
93
|
|
93
94
|
def num_errors_in_window
|
94
|
-
@window_errors.count(
|
95
|
+
@window_errors.count(&:itself).to_f
|
95
96
|
end
|
96
97
|
|
97
|
-
def
|
98
|
-
# We keep the errors array of the size @window_size this way, imagine @window_size = 5
|
99
|
-
# Error array inits as falses:
|
100
|
-
# [ false, false, false, false, false ]
|
101
|
-
# Third document raises an error:
|
102
|
-
# [ false, false, true, false, false ]
|
103
|
-
# ^^^^
|
104
|
-
# 2 % 5 == 2
|
105
|
-
# Fifth document raises an error:
|
106
|
-
# [ false, false, true, false, true ]
|
107
|
-
# ^^^^
|
108
|
-
# 4 % 5 == 4
|
109
|
-
# Sixth document raises an error:
|
110
|
-
# [ true, false, true, false, true ]
|
111
|
-
# ^^^^
|
112
|
-
# 5 % 5 == 0
|
113
|
-
#
|
114
|
-
# Eigth document is successful:
|
115
|
-
# [ true, false, false, false, true ]
|
116
|
-
# ^^^^^
|
117
|
-
# 7 % 5 == 2
|
118
|
-
# And so on.
|
119
|
-
@window_errors[@window_index] = is_error
|
98
|
+
def increment_window_index
|
120
99
|
@window_index = (@window_index + 1) % @window_size
|
121
100
|
end
|
122
101
|
|
data/lib/utility/filtering.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T004939Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -104,10 +104,8 @@ files:
|
|
104
104
|
- NOTICE.txt
|
105
105
|
- lib/connectors/connector_status.rb
|
106
106
|
- lib/connectors/crawler/scheduler.rb
|
107
|
-
- lib/connectors/job_trigger_method.rb
|
108
107
|
- lib/connectors/sync_status.rb
|
109
108
|
- lib/connectors_utility.rb
|
110
|
-
- lib/core/connector_job.rb
|
111
109
|
- lib/core/connector_settings.rb
|
112
110
|
- lib/core/elastic_connector_actions.rb
|
113
111
|
- lib/core/filtering/validation_status.rb
|
@@ -132,9 +130,9 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
132
130
|
licenses:
|
133
131
|
- Elastic-2.0
|
134
132
|
metadata:
|
135
|
-
revision:
|
136
|
-
repository: https://github.com/elastic/connectors-ruby
|
137
|
-
post_install_message:
|
133
|
+
revision: 294214a26b0fe9a4347763b01de681c336e8daae
|
134
|
+
repository: https://github.com/elastic/connectors-ruby.git
|
135
|
+
post_install_message:
|
138
136
|
rdoc_options: []
|
139
137
|
require_paths:
|
140
138
|
- lib
|
@@ -145,12 +143,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
145
143
|
version: '0'
|
146
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
145
|
requirements:
|
148
|
-
- - "
|
146
|
+
- - ">"
|
149
147
|
- !ruby/object:Gem::Version
|
150
|
-
version:
|
148
|
+
version: 1.3.1
|
151
149
|
requirements: []
|
152
150
|
rubygems_version: 3.0.3.1
|
153
|
-
signing_key:
|
151
|
+
signing_key:
|
154
152
|
specification_version: 4
|
155
153
|
summary: Gem containing shared Connector Services libraries
|
156
154
|
test_files: []
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Connectors
|
10
|
-
class JobTriggerMethod
|
11
|
-
ON_DEMAND = 'on_demand'
|
12
|
-
SCHEDULED = 'scheduled'
|
13
|
-
end
|
14
|
-
end
|
data/lib/core/connector_job.rb
DELETED
@@ -1,252 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'active_support/core_ext/hash/indifferent_access'
|
10
|
-
require 'connectors/sync_status'
|
11
|
-
require 'core/connector_settings'
|
12
|
-
require 'core/elastic_connector_actions'
|
13
|
-
require 'utility'
|
14
|
-
|
15
|
-
module Core
|
16
|
-
class ConnectorJob
|
17
|
-
DEFAULT_PAGE_SIZE = 100
|
18
|
-
STUCK_THRESHOLD = 60
|
19
|
-
|
20
|
-
def self.fetch_by_id(job_id)
|
21
|
-
es_response = ElasticConnectorActions.get_job(job_id)
|
22
|
-
return nil unless es_response[:found]
|
23
|
-
|
24
|
-
new(es_response)
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.pending_jobs(connectors_ids: [], page_size: DEFAULT_PAGE_SIZE)
|
28
|
-
status_term = { status: Connectors::SyncStatus::PENDING_STATUSES }
|
29
|
-
|
30
|
-
query = { bool: { must: [{ terms: status_term }] } }
|
31
|
-
|
32
|
-
return fetch_jobs_by_query(query, page_size) if connectors_ids.empty?
|
33
|
-
|
34
|
-
query[:bool][:must] << { terms: { 'connector.id' => connectors_ids } }
|
35
|
-
|
36
|
-
fetch_jobs_by_query(query, page_size)
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
|
40
|
-
connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
|
41
|
-
query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
|
42
|
-
fetch_jobs_by_query(query, page_size)
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.delete_jobs(jobs)
|
46
|
-
query = { terms: { '_id': jobs.map(&:id) } }
|
47
|
-
ElasticConnectorActions.delete_jobs_by_query(query)
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.stuck_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
|
51
|
-
connector_ids = if connector_id
|
52
|
-
[connector_id]
|
53
|
-
else
|
54
|
-
ConnectorSettings.fetch_native_connectors.map(&:id)
|
55
|
-
end
|
56
|
-
query = {
|
57
|
-
bool: {
|
58
|
-
filter: [
|
59
|
-
{ terms: { 'connector.id': connector_ids } },
|
60
|
-
{ terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
|
61
|
-
{ range: { last_seen: { lte: "now-#{STUCK_THRESHOLD}s" } } }
|
62
|
-
]
|
63
|
-
}
|
64
|
-
}
|
65
|
-
fetch_jobs_by_query(query, page_size)
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.enqueue(_connector_id)
|
69
|
-
nil
|
70
|
-
end
|
71
|
-
|
72
|
-
def id
|
73
|
-
@elasticsearch_response[:_id]
|
74
|
-
end
|
75
|
-
|
76
|
-
def [](property_name)
|
77
|
-
@elasticsearch_response[:_source][property_name]
|
78
|
-
end
|
79
|
-
|
80
|
-
def error
|
81
|
-
self[:error]
|
82
|
-
end
|
83
|
-
|
84
|
-
def status
|
85
|
-
self[:status]
|
86
|
-
end
|
87
|
-
|
88
|
-
def in_progress?
|
89
|
-
status == Connectors::SyncStatus::IN_PROGRESS
|
90
|
-
end
|
91
|
-
|
92
|
-
def canceling?
|
93
|
-
status == Connectors::SyncStatus::CANCELING
|
94
|
-
end
|
95
|
-
|
96
|
-
def suspended?
|
97
|
-
status == Connectors::SyncStatus::SUSPENDED
|
98
|
-
end
|
99
|
-
|
100
|
-
def canceled?
|
101
|
-
status == Connectors::SyncStatus::CANCELED
|
102
|
-
end
|
103
|
-
|
104
|
-
def pending?
|
105
|
-
Connectors::SyncStatus::PENDING_STATUSES.include?(status)
|
106
|
-
end
|
107
|
-
|
108
|
-
def active?
|
109
|
-
Connectors::SyncStatus::ACTIVE_STATUSES.include?(status)
|
110
|
-
end
|
111
|
-
|
112
|
-
def terminated?
|
113
|
-
Connectors::SyncStatus::TERMINAL_STATUSES.include?(status)
|
114
|
-
end
|
115
|
-
|
116
|
-
def connector_snapshot
|
117
|
-
self[:connector] || {}
|
118
|
-
end
|
119
|
-
|
120
|
-
def connector_id
|
121
|
-
connector_snapshot[:id]
|
122
|
-
end
|
123
|
-
|
124
|
-
def index_name
|
125
|
-
connector_snapshot[:index_name]
|
126
|
-
end
|
127
|
-
|
128
|
-
def language
|
129
|
-
connector_snapshot[:language]
|
130
|
-
end
|
131
|
-
|
132
|
-
def service_type
|
133
|
-
connector_snapshot[:service_type]
|
134
|
-
end
|
135
|
-
|
136
|
-
def configuration
|
137
|
-
connector_snapshot[:configuration]
|
138
|
-
end
|
139
|
-
|
140
|
-
def filtering
|
141
|
-
connector_snapshot[:filtering]
|
142
|
-
end
|
143
|
-
|
144
|
-
def pipeline
|
145
|
-
connector_snapshot[:pipeline] || {}
|
146
|
-
end
|
147
|
-
|
148
|
-
def extract_binary_content?
|
149
|
-
pipeline[:extract_binary_content]
|
150
|
-
end
|
151
|
-
|
152
|
-
def reduce_whitespace?
|
153
|
-
pipeline[:reduce_whitespace]
|
154
|
-
end
|
155
|
-
|
156
|
-
def run_ml_inference?
|
157
|
-
pipeline[:run_ml_inference]
|
158
|
-
end
|
159
|
-
|
160
|
-
def connector
|
161
|
-
@connector ||= ConnectorSettings.fetch_by_id(connector_id)
|
162
|
-
end
|
163
|
-
|
164
|
-
def update_metadata(ingestion_stats = {}, connector_metadata = {})
|
165
|
-
ingestion_stats ||= {}
|
166
|
-
doc = { :last_seen => Time.now }.merge(ingestion_stats)
|
167
|
-
doc[:metadata] = connector_metadata if connector_metadata&.any?
|
168
|
-
ElasticConnectorActions.update_job_fields(id, doc)
|
169
|
-
end
|
170
|
-
|
171
|
-
def done!(ingestion_stats = {}, connector_metadata = {})
|
172
|
-
terminate!(Connectors::SyncStatus::COMPLETED, nil, ingestion_stats, connector_metadata)
|
173
|
-
end
|
174
|
-
|
175
|
-
def error!(message, ingestion_stats = {}, connector_metadata = {})
|
176
|
-
terminate!(Connectors::SyncStatus::ERROR, message, ingestion_stats, connector_metadata)
|
177
|
-
end
|
178
|
-
|
179
|
-
def cancel!(ingestion_stats = {}, connector_metadata = {})
|
180
|
-
terminate!(Connectors::SyncStatus::CANCELED, nil, ingestion_stats, connector_metadata)
|
181
|
-
end
|
182
|
-
|
183
|
-
def with_concurrency_control
|
184
|
-
response = ElasticConnectorActions.get_job(id)
|
185
|
-
|
186
|
-
yield response, response['_seq_no'], response['_primary_term']
|
187
|
-
end
|
188
|
-
|
189
|
-
def make_running!
|
190
|
-
with_concurrency_control do |es_doc, seq_no, primary_term|
|
191
|
-
now = Time.now
|
192
|
-
doc = {
|
193
|
-
status: Connectors::SyncStatus::IN_PROGRESS,
|
194
|
-
started_at: now,
|
195
|
-
last_seen: now,
|
196
|
-
worker_hostname: Socket.gethostname
|
197
|
-
}
|
198
|
-
|
199
|
-
ElasticConnectorActions.update_job_fields(es_doc[:_id], doc, seq_no, primary_term)
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
def es_source
|
204
|
-
@elasticsearch_response[:_source]
|
205
|
-
end
|
206
|
-
|
207
|
-
private
|
208
|
-
|
209
|
-
def self.fetch_jobs_by_query(query, page_size)
|
210
|
-
results = []
|
211
|
-
offset = 0
|
212
|
-
loop do
|
213
|
-
response = ElasticConnectorActions.search_jobs(query, page_size, offset)
|
214
|
-
|
215
|
-
hits = response.dig('hits', 'hits') || []
|
216
|
-
total = response.dig('hits', 'total', 'value') || 0
|
217
|
-
results += hits.map { |hit| new(hit) }
|
218
|
-
break if results.size >= total
|
219
|
-
offset += hits.size
|
220
|
-
end
|
221
|
-
|
222
|
-
results
|
223
|
-
end
|
224
|
-
|
225
|
-
def initialize(es_response)
|
226
|
-
# TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
|
227
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
228
|
-
end
|
229
|
-
|
230
|
-
def terminate!(status, error = nil, ingestion_stats = {}, connector_metadata = {})
|
231
|
-
ingestion_stats ||= {}
|
232
|
-
ingestion_stats[:total_document_count] = ElasticConnectorActions.document_count(index_name)
|
233
|
-
doc = {
|
234
|
-
:last_seen => Time.now,
|
235
|
-
:completed_at => Time.now,
|
236
|
-
:status => status,
|
237
|
-
:error => error
|
238
|
-
}.merge(ingestion_stats)
|
239
|
-
doc[:canceled_at] = Time.now if status == Connectors::SyncStatus::CANCELED
|
240
|
-
doc[:metadata] = connector_metadata if connector_metadata&.any?
|
241
|
-
ElasticConnectorActions.update_job_fields(id, doc)
|
242
|
-
end
|
243
|
-
|
244
|
-
def seq_no
|
245
|
-
@elasticsearch_response[:_seq_no]
|
246
|
-
end
|
247
|
-
|
248
|
-
def primary_term
|
249
|
-
@elasticsearch_response[:_primary_term]
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|