teraslice 0.87.1 → 0.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cluster-service.js +24 -18
- package/dist/src/index.js +42 -0
- package/package.json +11 -15
- package/service.js +4 -6
- package/worker-service.js +6 -6
- package/index.js +0 -21
- package/lib/cluster/cluster_master.js +0 -164
- package/lib/cluster/node_master.js +0 -393
- package/lib/cluster/services/api.js +0 -581
- package/lib/cluster/services/assets.js +0 -211
- package/lib/cluster/services/cluster/backends/kubernetes/deployments/worker.hbs +0 -86
- package/lib/cluster/services/cluster/backends/kubernetes/index.js +0 -225
- package/lib/cluster/services/cluster/backends/kubernetes/jobs/execution_controller.hbs +0 -69
- package/lib/cluster/services/cluster/backends/kubernetes/k8s.js +0 -450
- package/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +0 -443
- package/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +0 -67
- package/lib/cluster/services/cluster/backends/kubernetes/utils.js +0 -58
- package/lib/cluster/services/cluster/backends/native/index.js +0 -611
- package/lib/cluster/services/cluster/backends/native/messaging.js +0 -563
- package/lib/cluster/services/cluster/backends/state-utils.js +0 -49
- package/lib/cluster/services/cluster/index.js +0 -15
- package/lib/cluster/services/execution.js +0 -459
- package/lib/cluster/services/jobs.js +0 -303
- package/lib/config/default-sysconfig.js +0 -47
- package/lib/config/index.js +0 -32
- package/lib/config/schemas/system.js +0 -333
- package/lib/processors/save_file/index.js +0 -9
- package/lib/processors/save_file/processor.js +0 -17
- package/lib/processors/save_file/schema.js +0 -17
- package/lib/processors/script.js +0 -130
- package/lib/processors/stdout/index.js +0 -9
- package/lib/processors/stdout/processor.js +0 -19
- package/lib/processors/stdout/schema.js +0 -18
- package/lib/storage/analytics.js +0 -106
- package/lib/storage/assets.js +0 -275
- package/lib/storage/backends/elasticsearch_store.js +0 -567
- package/lib/storage/backends/mappings/analytics.json +0 -49
- package/lib/storage/backends/mappings/asset.json +0 -40
- package/lib/storage/backends/mappings/ex.json +0 -55
- package/lib/storage/backends/mappings/job.json +0 -31
- package/lib/storage/backends/mappings/state.json +0 -37
- package/lib/storage/execution.js +0 -331
- package/lib/storage/index.js +0 -16
- package/lib/storage/jobs.js +0 -97
- package/lib/storage/state.js +0 -302
- package/lib/utils/api_utils.js +0 -173
- package/lib/utils/asset_utils.js +0 -117
- package/lib/utils/date_utils.js +0 -58
- package/lib/utils/encoding_utils.js +0 -29
- package/lib/utils/events.js +0 -7
- package/lib/utils/file_utils.js +0 -118
- package/lib/utils/id_utils.js +0 -19
- package/lib/utils/port_utils.js +0 -83
- package/lib/workers/assets/loader.js +0 -109
- package/lib/workers/assets/spawn.js +0 -78
- package/lib/workers/context/execution-context.js +0 -16
- package/lib/workers/context/terafoundation-context.js +0 -10
- package/lib/workers/execution-controller/execution-analytics.js +0 -211
- package/lib/workers/execution-controller/index.js +0 -1033
- package/lib/workers/execution-controller/recovery.js +0 -188
- package/lib/workers/execution-controller/scheduler.js +0 -461
- package/lib/workers/execution-controller/slice-analytics.js +0 -115
- package/lib/workers/helpers/job.js +0 -93
- package/lib/workers/helpers/op-analytics.js +0 -22
- package/lib/workers/helpers/terafoundation.js +0 -43
- package/lib/workers/helpers/worker-shutdown.js +0 -187
- package/lib/workers/metrics/index.js +0 -139
- package/lib/workers/worker/index.js +0 -344
- package/lib/workers/worker/slice.js +0 -143
|
@@ -1,459 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const sortBy = require('lodash/sortBy');
|
|
4
|
-
const {
|
|
5
|
-
Queue,
|
|
6
|
-
TSError,
|
|
7
|
-
getFullErrorStack,
|
|
8
|
-
logError,
|
|
9
|
-
get,
|
|
10
|
-
withoutNil,
|
|
11
|
-
isEmpty,
|
|
12
|
-
isString,
|
|
13
|
-
flatten,
|
|
14
|
-
includes,
|
|
15
|
-
cloneDeep,
|
|
16
|
-
} = require('@terascope/utils');
|
|
17
|
-
const { setInterval } = require('timers');
|
|
18
|
-
const { makeLogger } = require('../../workers/helpers/terafoundation');
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* New execution result
|
|
22
|
-
* @typedef NewExecutionResult
|
|
23
|
-
* @property {string} job_id
|
|
24
|
-
* @property {string} ex_id
|
|
25
|
-
*/
|
|
26
|
-
|
|
27
|
-
/*
|
|
28
|
-
Execution Life Cycle for _status
|
|
29
|
-
pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
|
|
30
|
-
Exceptions
|
|
31
|
-
rejected - when a execution is rejected prior to scheduling
|
|
32
|
-
failed - when there is an error while the execution is running
|
|
33
|
-
aborted - when a execution was running at the point when the cluster shutsdown
|
|
34
|
-
*/
|
|
35
|
-
|
|
36
|
-
module.exports = function executionService(context, { clusterMasterServer }) {
|
|
37
|
-
const logger = makeLogger(context, 'execution_service');
|
|
38
|
-
const pendingExecutionQueue = new Queue();
|
|
39
|
-
const isNative = context.sysconfig.teraslice.cluster_manager_type === 'native';
|
|
40
|
-
|
|
41
|
-
let exStore;
|
|
42
|
-
let stateStore;
|
|
43
|
-
let clusterService;
|
|
44
|
-
let allocateInterval;
|
|
45
|
-
let reapInterval;
|
|
46
|
-
|
|
47
|
-
function enqueue(ex) {
|
|
48
|
-
const size = pendingExecutionQueue.size();
|
|
49
|
-
logger.debug(ex, `enqueueing execution to be processed (queue size ${size})`);
|
|
50
|
-
pendingExecutionQueue.enqueue(cloneDeep(ex));
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
function getClusterAnalytics() {
|
|
54
|
-
return clusterMasterServer.getClusterAnalytics();
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
async function waitForExecutionStatus(exId, _status) {
|
|
58
|
-
const status = _status || 'stopped';
|
|
59
|
-
|
|
60
|
-
return new Promise((resolve) => {
|
|
61
|
-
async function checkCluster() {
|
|
62
|
-
const state = clusterService.getClusterState();
|
|
63
|
-
const dict = Object.create(null);
|
|
64
|
-
|
|
65
|
-
Object.values(state).forEach((node) => node.active.forEach((worker) => {
|
|
66
|
-
dict[worker.ex_id] = true;
|
|
67
|
-
}));
|
|
68
|
-
|
|
69
|
-
// if found, do not resolve
|
|
70
|
-
if (dict[exId]) {
|
|
71
|
-
setTimeout(checkCluster, 3000);
|
|
72
|
-
return;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
try {
|
|
76
|
-
await exStore.verifyStatusUpdate(exId, status);
|
|
77
|
-
await exStore.setStatus(exId, status);
|
|
78
|
-
} catch (err) {
|
|
79
|
-
logError(logger, err, `failure setting execution, ${exId}, to ${status}`);
|
|
80
|
-
} finally {
|
|
81
|
-
resolve(true);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
checkCluster();
|
|
85
|
-
});
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
async function shutdown() {
|
|
89
|
-
logger.info('shutting down');
|
|
90
|
-
|
|
91
|
-
clearInterval(allocateInterval);
|
|
92
|
-
clearInterval(reapInterval);
|
|
93
|
-
allocateInterval = null;
|
|
94
|
-
reapInterval = null;
|
|
95
|
-
|
|
96
|
-
const query = exStore.getLivingStatuses().map((str) => `_status:${str}`).join(' OR ');
|
|
97
|
-
const executions = await exStore.search(query);
|
|
98
|
-
await Promise.all(executions.map(async (execution) => {
|
|
99
|
-
if (!isNative) return;
|
|
100
|
-
logger.warn(`marking execution ex_id: ${execution.ex_id}, job_id: ${execution.job_id} as terminated`);
|
|
101
|
-
const exId = execution.ex_id;
|
|
102
|
-
const { hostname } = context.sysconfig.teraslice;
|
|
103
|
-
|
|
104
|
-
// need to exclude sending a stop to cluster master host, the shutdown event
|
|
105
|
-
// has already been propagated this can cause a condition of it waiting for
|
|
106
|
-
// stop to return but it already has which pauses this service shutdown
|
|
107
|
-
await stopExecution(exId, null, hostname);
|
|
108
|
-
await waitForExecutionStatus(exId, 'terminated');
|
|
109
|
-
}));
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function findAllWorkers() {
|
|
113
|
-
return flatten(Object.values(clusterService.getClusterState())
|
|
114
|
-
.filter((node) => node.state === 'connected')
|
|
115
|
-
.map((node) => {
|
|
116
|
-
const workers = node.active.filter(Boolean);
|
|
117
|
-
|
|
118
|
-
return workers.map((worker) => {
|
|
119
|
-
worker.node_id = node.node_id;
|
|
120
|
-
worker.hostname = node.hostname;
|
|
121
|
-
return worker;
|
|
122
|
-
});
|
|
123
|
-
}))
|
|
124
|
-
.filter(Boolean);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
async function addWorkers(exId, workerNum) {
|
|
128
|
-
return exStore.getActiveExecution(exId)
|
|
129
|
-
.then((execution) => clusterService.addWorkers(execution, workerNum));
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
async function setWorkers(exId, workerNum) {
|
|
133
|
-
return exStore.getActiveExecution(exId)
|
|
134
|
-
.then((execution) => clusterService.setWorkers(execution, workerNum));
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
async function removeWorkers(exId, workerNum) {
|
|
138
|
-
return exStore.getActiveExecution(exId)
|
|
139
|
-
.then((execution) => clusterService.removeWorkers(execution.ex_id, workerNum));
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
/**
|
|
143
|
-
* Check if the execution is in a terminal status
|
|
144
|
-
*
|
|
145
|
-
* @param {import('@terascope/job-components').ExecutionConfig} execution
|
|
146
|
-
* @returns {boolean}
|
|
147
|
-
*/
|
|
148
|
-
function isExecutionTerminal(execution) {
|
|
149
|
-
const terminalList = exStore.getTerminalStatuses();
|
|
150
|
-
return terminalList.find((tStat) => tStat === execution._status) != null;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// safely stop the execution without setting the ex status to stopping or stopped
|
|
154
|
-
async function finishExecution(exId, err) {
|
|
155
|
-
if (err) {
|
|
156
|
-
const error = new TSError(err, {
|
|
157
|
-
reason: `terminal error for execution: ${exId}, shutting down execution`,
|
|
158
|
-
context: {
|
|
159
|
-
ex_id: exId,
|
|
160
|
-
}
|
|
161
|
-
});
|
|
162
|
-
logger.error(error);
|
|
163
|
-
}
|
|
164
|
-
const execution = await getExecutionContext(exId);
|
|
165
|
-
const status = execution._status;
|
|
166
|
-
if (['stopping', 'stopped'].includes(status)) {
|
|
167
|
-
logger.debug(`execution ${exId} is already stopping which means there is no need to stop the execution`);
|
|
168
|
-
return;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
logger.debug(`execution ${exId} finished, shutting down execution`);
|
|
172
|
-
try {
|
|
173
|
-
await clusterService.stopExecution(exId);
|
|
174
|
-
} catch (stopErr) {
|
|
175
|
-
const stopError = new TSError(stopErr, {
|
|
176
|
-
reason: 'error finishing the execution',
|
|
177
|
-
context: {
|
|
178
|
-
ex_id: exId,
|
|
179
|
-
}
|
|
180
|
-
});
|
|
181
|
-
logError(logger, stopError);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
async function stopExecution(exId, timeout, excludeNode) {
|
|
186
|
-
const execution = await getExecutionContext(exId);
|
|
187
|
-
const isTerminal = isExecutionTerminal(execution._status);
|
|
188
|
-
if (isTerminal) {
|
|
189
|
-
logger.info(`execution ${exId} is in terminal status "${execution._status}", it cannot be stopped`);
|
|
190
|
-
return;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
if (execution._status === 'stopping') {
|
|
194
|
-
logger.info('execution is already stopping...');
|
|
195
|
-
// we are kicking this off in the background, not part of the promise chain
|
|
196
|
-
waitForExecutionStatus(exId);
|
|
197
|
-
return;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
logger.debug(`stopping execution ${exId}...`, withoutNil({ timeout, excludeNode }));
|
|
201
|
-
await exStore.setStatus(exId, 'stopping');
|
|
202
|
-
await clusterService.stopExecution(exId, timeout, excludeNode);
|
|
203
|
-
// we are kicking this off in the background, not part of the promise chain
|
|
204
|
-
waitForExecutionStatus(exId);
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
async function pauseExecution(exId) {
|
|
208
|
-
const status = 'paused';
|
|
209
|
-
const execution = await exStore.getActiveExecution(exId);
|
|
210
|
-
if (!clusterMasterServer.isClientReady(execution.ex_id)) {
|
|
211
|
-
throw new Error(`Execution ${execution.ex_id} is not available to pause`);
|
|
212
|
-
}
|
|
213
|
-
await clusterMasterServer.sendExecutionPause(exId);
|
|
214
|
-
await exStore.setStatus(exId, status);
|
|
215
|
-
return { status };
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
async function resumeExecution(exId) {
|
|
219
|
-
const status = 'running';
|
|
220
|
-
const execution = await exStore.getActiveExecution(exId);
|
|
221
|
-
if (!clusterMasterServer.isClientReady(execution.ex_id)) {
|
|
222
|
-
throw new Error(`Execution ${execution.ex_id} is not available to resume`);
|
|
223
|
-
}
|
|
224
|
-
await clusterMasterServer.sendExecutionResume(execution.ex_id);
|
|
225
|
-
await exStore.setStatus(execution.ex_id, status);
|
|
226
|
-
return { status };
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
async function getControllerStats(exId) {
|
|
230
|
-
// if no exId is provided it returns all running executions
|
|
231
|
-
const specificId = exId || false;
|
|
232
|
-
const exIds = await getRunningExecutions(exId);
|
|
233
|
-
const clients = clusterMasterServer.onlineClients.filter(({ clientId }) => {
|
|
234
|
-
if (specificId && clientId === specificId) return true;
|
|
235
|
-
return includes(exIds, clientId);
|
|
236
|
-
});
|
|
237
|
-
|
|
238
|
-
function formatResponse(msg) {
|
|
239
|
-
const payload = get(msg, 'payload', {});
|
|
240
|
-
const identifiers = {
|
|
241
|
-
ex_id: payload.ex_id,
|
|
242
|
-
job_id: payload.job_id,
|
|
243
|
-
name: payload.name
|
|
244
|
-
};
|
|
245
|
-
return Object.assign(identifiers, payload.stats);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
if (isEmpty(clients)) {
|
|
249
|
-
if (specificId) {
|
|
250
|
-
throw new TSError(`Could not find active slicer for ex_id: ${specificId}`, {
|
|
251
|
-
statusCode: 404
|
|
252
|
-
});
|
|
253
|
-
}
|
|
254
|
-
return [];
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
const promises = clients.map((client) => {
|
|
258
|
-
const { clientId } = client;
|
|
259
|
-
return clusterMasterServer
|
|
260
|
-
.sendExecutionAnalyticsRequest(clientId)
|
|
261
|
-
.then(formatResponse);
|
|
262
|
-
});
|
|
263
|
-
|
|
264
|
-
const results = await Promise.all(promises);
|
|
265
|
-
return sortBy(results, ['name', 'started']).reverse();
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
/**
|
|
269
|
-
* Create a new execution context
|
|
270
|
-
*
|
|
271
|
-
* @param {string|import('@terascope/job-components').JobConfig} job
|
|
272
|
-
* @return {Promise<NewExecutionResult>}
|
|
273
|
-
*/
|
|
274
|
-
async function createExecutionContext(job) {
|
|
275
|
-
const ex = await exStore.create(job);
|
|
276
|
-
enqueue(ex);
|
|
277
|
-
return { job_id: ex.job_id, ex_id: ex.ex_id };
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
async function getExecutionContext(exId) {
|
|
281
|
-
return exStore.get(exId)
|
|
282
|
-
.catch((err) => logError(logger, err, `error getting execution context for ex: ${exId}`));
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
async function getRunningExecutions(exId) {
|
|
286
|
-
let query = exStore.getRunningStatuses().map((state) => ` _status:${state} `).join('OR');
|
|
287
|
-
if (exId) query = `ex_id:"${exId}" AND (${query.trim()})`;
|
|
288
|
-
const exs = await exStore.search(query, null, null, '_created:desc');
|
|
289
|
-
return exs.map((ex) => ex.ex_id);
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
/**
|
|
293
|
-
* Recover the execution
|
|
294
|
-
*
|
|
295
|
-
* @param {string|import('@terascope/job-components').ExecutionConfig} exIdOrEx
|
|
296
|
-
* @param {import('@terascope/job-components').RecoveryCleanupType} [cleanupType]
|
|
297
|
-
* @return {Promise<NewExecutionResult>}
|
|
298
|
-
*/
|
|
299
|
-
async function recoverExecution(exIdOrEx, cleanupType) {
|
|
300
|
-
const recoverFromEx = isString(exIdOrEx)
|
|
301
|
-
? await getExecutionContext(exIdOrEx)
|
|
302
|
-
: cloneDeep(exIdOrEx);
|
|
303
|
-
|
|
304
|
-
const ex = await exStore.createRecoveredExecution(recoverFromEx, cleanupType);
|
|
305
|
-
enqueue(ex);
|
|
306
|
-
return { job_id: ex.job_id, ex_id: ex.ex_id };
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
function _executionAllocator() {
|
|
310
|
-
let allocatingExecution = false;
|
|
311
|
-
const { readyForAllocation } = clusterService;
|
|
312
|
-
return async function allocator() {
|
|
313
|
-
const canAllocate = !allocatingExecution
|
|
314
|
-
&& pendingExecutionQueue.size() > 0
|
|
315
|
-
&& readyForAllocation();
|
|
316
|
-
if (!canAllocate) return;
|
|
317
|
-
|
|
318
|
-
allocatingExecution = true;
|
|
319
|
-
let execution = pendingExecutionQueue.dequeue();
|
|
320
|
-
|
|
321
|
-
logger.info(`Scheduling execution: ${execution.ex_id}`);
|
|
322
|
-
|
|
323
|
-
try {
|
|
324
|
-
execution = await exStore.setStatus(execution.ex_id, 'scheduling');
|
|
325
|
-
|
|
326
|
-
execution = await clusterService.allocateSlicer(execution);
|
|
327
|
-
|
|
328
|
-
execution = await exStore.setStatus(execution.ex_id, 'initializing', {
|
|
329
|
-
slicer_port: execution.slicer_port,
|
|
330
|
-
slicer_hostname: execution.slicer_hostname
|
|
331
|
-
});
|
|
332
|
-
|
|
333
|
-
try {
|
|
334
|
-
await clusterService.allocateWorkers(execution, execution.workers);
|
|
335
|
-
} catch (err) {
|
|
336
|
-
throw new TSError(err, {
|
|
337
|
-
reason: `Failure to allocateWorkers ${execution.ex_id}`
|
|
338
|
-
});
|
|
339
|
-
}
|
|
340
|
-
} catch (err) {
|
|
341
|
-
const msg = `Failed to provision execution ${execution.ex_id}`;
|
|
342
|
-
const error = new TSError(err, {
|
|
343
|
-
reason: msg
|
|
344
|
-
});
|
|
345
|
-
logger.warn(msg);
|
|
346
|
-
|
|
347
|
-
try {
|
|
348
|
-
await exStore.setStatus(
|
|
349
|
-
execution.ex_id,
|
|
350
|
-
'failed',
|
|
351
|
-
exStore.executionMetaData(null, getFullErrorStack(error))
|
|
352
|
-
);
|
|
353
|
-
} catch (failedErr) {
|
|
354
|
-
logger.error(new TSError(err, {
|
|
355
|
-
reason: 'Failure to set execution status to failed after provision failed'
|
|
356
|
-
}));
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
if (context.sysconfig.teraslice.cluster_manager_type === 'kubernetes') {
|
|
360
|
-
// Since this condition is only hit in cases where the pods
|
|
361
|
-
// are never scheduled, all this call to stopExecution
|
|
362
|
-
// accomplishes is to delete the k8s resources, which is
|
|
363
|
-
// probably just the k8s job for the execution controller.
|
|
364
|
-
// Calling delete on the worker deployment that doesn't
|
|
365
|
-
// exist is OK.
|
|
366
|
-
logger.warn(`Calling stopExecution on execution: ${execution.ex_id} to clean up k8s resources.`);
|
|
367
|
-
await clusterService.stopExecution(execution.ex_id);
|
|
368
|
-
}
|
|
369
|
-
} finally {
|
|
370
|
-
allocatingExecution = false;
|
|
371
|
-
allocator();
|
|
372
|
-
}
|
|
373
|
-
};
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
async function reapExecutions() {
|
|
377
|
-
// make sure to capture the error avoid throwing an
|
|
378
|
-
// unhandled rejection
|
|
379
|
-
try {
|
|
380
|
-
// sometimes in development an execution gets stuck in stopping
|
|
381
|
-
// status since the process gets force killed in before it
|
|
382
|
-
// can be updated to stopped.
|
|
383
|
-
const stopping = await exStore.search('_status:stopping');
|
|
384
|
-
for (const execution of stopping) {
|
|
385
|
-
const updatedAt = new Date(execution._updated).getTime();
|
|
386
|
-
const updatedWithTimeout = updatedAt + context.sysconfig.teraslice.shutdown_timeout;
|
|
387
|
-
// Since we don't want to break executions that actually are "stopping"
|
|
388
|
-
// we need to verify that the job has exceeded the shutdown timeout
|
|
389
|
-
if (Date.now() > updatedWithTimeout) {
|
|
390
|
-
logger.info(`stopping stuck executing ${execution._status} execution: ${execution.ex_id}`);
|
|
391
|
-
await exStore.setStatus(execution.ex_id, 'stopped');
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
} catch (err) {
|
|
395
|
-
logger.error(err, 'failure reaping executions');
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
async function initialize() {
|
|
400
|
-
exStore = context.stores.execution;
|
|
401
|
-
stateStore = context.stores.state;
|
|
402
|
-
if (exStore == null || stateStore == null) {
|
|
403
|
-
throw new Error('Missing required stores');
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
clusterService = context.services.cluster;
|
|
407
|
-
if (clusterService == null) {
|
|
408
|
-
throw new Error('Missing required services');
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
logger.info('execution service is initializing...');
|
|
412
|
-
|
|
413
|
-
// listen for an execution finished events
|
|
414
|
-
clusterMasterServer.onExecutionFinished(finishExecution);
|
|
415
|
-
|
|
416
|
-
// lets call this before calling it
|
|
417
|
-
// in the background
|
|
418
|
-
await reapExecutions();
|
|
419
|
-
|
|
420
|
-
const pending = await exStore.search('_status:pending', null, 10000, '_created:asc');
|
|
421
|
-
for (const execution of pending) {
|
|
422
|
-
logger.info(`enqueuing ${execution._status} execution: ${execution.ex_id}`);
|
|
423
|
-
enqueue(execution);
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
const queueSize = pendingExecutionQueue.size();
|
|
427
|
-
|
|
428
|
-
if (queueSize > 0) {
|
|
429
|
-
logger.info(`execution queue initialization complete, ${pendingExecutionQueue.size()} pending executions have been enqueued`);
|
|
430
|
-
} else {
|
|
431
|
-
logger.debug('execution queue initialization complete');
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
allocateInterval = setInterval(_executionAllocator(), 1000);
|
|
435
|
-
reapInterval = setInterval(
|
|
436
|
-
reapExecutions,
|
|
437
|
-
context.sysconfig.teraslice.shutdown_timeout || 30000
|
|
438
|
-
);
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
return {
|
|
442
|
-
getClusterAnalytics,
|
|
443
|
-
getControllerStats,
|
|
444
|
-
findAllWorkers,
|
|
445
|
-
shutdown,
|
|
446
|
-
initialize,
|
|
447
|
-
stopExecution,
|
|
448
|
-
pauseExecution,
|
|
449
|
-
resumeExecution,
|
|
450
|
-
recoverExecution,
|
|
451
|
-
removeWorkers,
|
|
452
|
-
addWorkers,
|
|
453
|
-
setWorkers,
|
|
454
|
-
createExecutionContext,
|
|
455
|
-
getExecutionContext,
|
|
456
|
-
isExecutionTerminal,
|
|
457
|
-
waitForExecutionStatus
|
|
458
|
-
};
|
|
459
|
-
};
|