teraslice 2.10.0 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/dist/src/interfaces.js +12 -0
  2. package/dist/src/lib/cluster/cluster_master.js +246 -0
  3. package/dist/src/lib/cluster/node_master.js +355 -0
  4. package/dist/src/lib/cluster/services/api.js +663 -0
  5. package/dist/src/lib/cluster/services/assets.js +226 -0
  6. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/index.js +192 -0
  7. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8s.js +481 -0
  8. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +414 -0
  9. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +59 -0
  10. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/utils.js +43 -0
  11. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/index.js +192 -0
  12. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/interfaces.js +2 -0
  13. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.js +423 -0
  14. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sDeploymentResource.js +60 -0
  15. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sJobResource.js +55 -0
  16. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sResource.js +359 -0
  17. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sServiceResource.js +37 -0
  18. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sState.js +60 -0
  19. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/utils.js +170 -0
  20. package/dist/src/lib/cluster/services/cluster/backends/native/dispatch.js +13 -0
  21. package/dist/src/lib/cluster/services/cluster/backends/native/index.js +526 -0
  22. package/dist/src/lib/cluster/services/cluster/backends/native/messaging.js +547 -0
  23. package/dist/src/lib/cluster/services/cluster/backends/state-utils.js +26 -0
  24. package/dist/src/lib/cluster/services/cluster/index.js +17 -0
  25. package/dist/src/lib/cluster/services/execution.js +435 -0
  26. package/dist/src/lib/cluster/services/index.js +6 -0
  27. package/dist/src/lib/cluster/services/interfaces.js +2 -0
  28. package/dist/src/lib/cluster/services/jobs.js +454 -0
  29. package/dist/src/lib/config/default-sysconfig.js +26 -0
  30. package/dist/src/lib/config/index.js +22 -0
  31. package/dist/src/lib/config/schemas/system.js +360 -0
  32. package/dist/src/lib/storage/analytics.js +86 -0
  33. package/dist/src/lib/storage/assets.js +401 -0
  34. package/dist/src/lib/storage/backends/elasticsearch_store.js +494 -0
  35. package/dist/src/lib/storage/backends/mappings/analytics.js +50 -0
  36. package/dist/src/lib/storage/backends/mappings/asset.js +41 -0
  37. package/dist/src/lib/storage/backends/mappings/ex.js +62 -0
  38. package/dist/src/lib/storage/backends/mappings/job.js +38 -0
  39. package/dist/src/lib/storage/backends/mappings/state.js +38 -0
  40. package/dist/src/lib/storage/backends/s3_store.js +237 -0
  41. package/dist/src/lib/storage/execution.js +300 -0
  42. package/dist/src/lib/storage/index.js +7 -0
  43. package/dist/src/lib/storage/jobs.js +81 -0
  44. package/dist/src/lib/storage/state.js +255 -0
  45. package/dist/src/lib/utils/api_utils.js +157 -0
  46. package/dist/src/lib/utils/asset_utils.js +94 -0
  47. package/dist/src/lib/utils/date_utils.js +52 -0
  48. package/dist/src/lib/utils/encoding_utils.js +27 -0
  49. package/dist/src/lib/utils/events.js +4 -0
  50. package/dist/src/lib/utils/file_utils.js +124 -0
  51. package/dist/src/lib/utils/id_utils.js +15 -0
  52. package/dist/src/lib/utils/port_utils.js +32 -0
  53. package/dist/src/lib/workers/assets/index.js +3 -0
  54. package/dist/src/lib/workers/assets/loader-executable.js +40 -0
  55. package/dist/src/lib/workers/assets/loader.js +73 -0
  56. package/dist/src/lib/workers/assets/spawn.js +55 -0
  57. package/dist/src/lib/workers/context/execution-context.js +12 -0
  58. package/dist/src/lib/workers/context/terafoundation-context.js +8 -0
  59. package/dist/src/lib/workers/execution-controller/execution-analytics.js +188 -0
  60. package/dist/src/lib/workers/execution-controller/index.js +1024 -0
  61. package/dist/src/lib/workers/execution-controller/recovery.js +151 -0
  62. package/dist/src/lib/workers/execution-controller/scheduler.js +390 -0
  63. package/dist/src/lib/workers/execution-controller/slice-analytics.js +96 -0
  64. package/dist/src/lib/workers/helpers/job.js +80 -0
  65. package/dist/src/lib/workers/helpers/op-analytics.js +22 -0
  66. package/dist/src/lib/workers/helpers/terafoundation.js +34 -0
  67. package/dist/src/lib/workers/helpers/worker-shutdown.js +169 -0
  68. package/dist/src/lib/workers/metrics/index.js +108 -0
  69. package/dist/src/lib/workers/worker/index.js +378 -0
  70. package/dist/src/lib/workers/worker/slice.js +122 -0
  71. package/dist/test/config/schemas/system_schema-spec.js +37 -0
  72. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.js +316 -0
  73. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sResource-spec.js +795 -0
  74. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sState-multicluster-spec.js +67 -0
  75. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sState-spec.js +84 -0
  76. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/utils-spec.js +132 -0
  77. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.js +455 -0
  78. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sResource-v2-spec.js +818 -0
  79. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-multicluster-v2-spec.js +67 -0
  80. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-v2-spec.js +84 -0
  81. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/utils-v2-spec.js +320 -0
  82. package/dist/test/lib/cluster/services/cluster/backends/state-utils-spec.js +37 -0
  83. package/dist/test/node_master-spec.js +188 -0
  84. package/dist/test/services/api-spec.js +80 -0
  85. package/dist/test/services/assets-spec.js +158 -0
  86. package/dist/test/services/messaging-spec.js +440 -0
  87. package/dist/test/storage/assets_storage-spec.js +95 -0
  88. package/dist/test/storage/s3_store-spec.js +138 -0
  89. package/dist/test/test.config.js +8 -0
  90. package/dist/test/test.setup.js +6 -0
  91. package/dist/test/utils/api_utils-spec.js +86 -0
  92. package/dist/test/utils/asset_utils-spec.js +141 -0
  93. package/dist/test/utils/elastic_utils-spec.js +25 -0
  94. package/dist/test/workers/execution-controller/execution-controller-spec.js +371 -0
  95. package/dist/test/workers/execution-controller/execution-special-test-cases-spec.js +520 -0
  96. package/dist/test/workers/execution-controller/execution-test-cases-spec.js +338 -0
  97. package/dist/test/workers/execution-controller/recovery-spec.js +160 -0
  98. package/dist/test/workers/execution-controller/scheduler-spec.js +249 -0
  99. package/dist/test/workers/execution-controller/slice-analytics-spec.js +121 -0
  100. package/dist/test/workers/fixtures/ops/example-op/processor.js +20 -0
  101. package/dist/test/workers/fixtures/ops/example-op/schema.js +19 -0
  102. package/dist/test/workers/fixtures/ops/example-reader/fetcher.js +20 -0
  103. package/dist/test/workers/fixtures/ops/example-reader/schema.js +41 -0
  104. package/dist/test/workers/fixtures/ops/example-reader/slicer.js +37 -0
  105. package/dist/test/workers/fixtures/ops/new-op/processor.js +29 -0
  106. package/dist/test/workers/fixtures/ops/new-op/schema.js +18 -0
  107. package/dist/test/workers/fixtures/ops/new-reader/fetcher.js +19 -0
  108. package/dist/test/workers/fixtures/ops/new-reader/schema.js +23 -0
  109. package/dist/test/workers/fixtures/ops/new-reader/slicer.js +13 -0
  110. package/dist/test/workers/helpers/configs.js +130 -0
  111. package/dist/test/workers/helpers/execution-controller-helper.js +49 -0
  112. package/dist/test/workers/helpers/index.js +5 -0
  113. package/dist/test/workers/helpers/test-context.js +210 -0
  114. package/dist/test/workers/helpers/zip-directory.js +25 -0
  115. package/dist/test/workers/worker/slice-spec.js +333 -0
  116. package/dist/test/workers/worker/worker-spec.js +356 -0
  117. package/package.json +94 -93
  118. package/service.js +0 -0
@@ -0,0 +1,435 @@
1
+ import { Queue, TSError, getFullErrorStack, logError, get, withoutNil, isEmpty, multiFieldSort, isString, flatten, includes, cloneDeep } from '@terascope/utils';
2
+ import { makeLogger } from '../../workers/helpers/terafoundation.js';
3
+ /**
4
+ * New execution result
5
+ * @typedef NewExecutionResult
6
+ * @property {string} job_id
7
+ * @property {string} ex_id
8
+ */
9
+ /*
10
+ Execution Life Cycle for _status
11
+ pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
12
+ Exceptions
13
+ rejected - when a execution is rejected prior to scheduling
14
+ failed - when there is an error while the execution is running
15
+ aborted - when a execution was running at the point when the cluster shutsdown
16
+ */
17
+ export class ExecutionService {
18
+ logger;
19
+ pendingExecutionQueue = new Queue();
20
+ isNative;
21
+ context;
22
+ clusterMasterServer;
23
+ executionStorage;
24
+ stateStorage;
25
+ clusterService;
26
+ allocateInterval;
27
+ reapInterval;
28
+ constructor(context, { clusterMasterServer }) {
29
+ this.context = context;
30
+ this.logger = makeLogger(context, 'execution_service');
31
+ this.isNative = context.sysconfig.teraslice.cluster_manager_type === 'native';
32
+ this.clusterMasterServer = clusterMasterServer;
33
+ }
34
+ async initialize() {
35
+ const { executionStorage, stateStorage } = this.context.stores;
36
+ if (executionStorage == null || stateStorage == null) {
37
+ throw new Error('Missing required stores');
38
+ }
39
+ const { clusterService } = this.context.services;
40
+ if (clusterService == null) {
41
+ throw new Error('Missing required services');
42
+ }
43
+ this.executionStorage = executionStorage;
44
+ this.stateStorage = stateStorage;
45
+ this.clusterService = clusterService;
46
+ this.logger.info('execution service is initializing...');
47
+ // listen for an execution finished events
48
+ // TODO: look closer at the types of the callback
49
+ this.clusterMasterServer.onExecutionFinished(this._finishExecution.bind(this));
50
+ // lets call this before calling it
51
+ // in the background
52
+ await this.reapExecutions();
53
+ const pending = await executionStorage.search('_status:pending', undefined, 10000, '_created:asc');
54
+ for (const execution of pending) {
55
+ this.logger.info(`enqueuing ${execution._status} execution: ${execution.ex_id}`);
56
+ this.enqueue(execution);
57
+ }
58
+ const queueSize = this.pendingExecutionQueue.size();
59
+ if (queueSize > 0) {
60
+ this.logger.info(`execution queue initialization complete, ${this.pendingExecutionQueue.size()} pending executions have been enqueued`);
61
+ }
62
+ else {
63
+ this.logger.debug('execution queue initialization complete');
64
+ }
65
+ const executionAllocator = this._executionAllocator().bind(this);
66
+ this.allocateInterval = setInterval(executionAllocator, 1000);
67
+ this.reapInterval = setInterval(this.reapExecutions.bind(this), this.context.sysconfig.teraslice.shutdown_timeout || 30000);
68
+ }
69
+ enqueue(ex) {
70
+ const size = this.pendingExecutionQueue.size();
71
+ this.logger.debug(ex, `enqueueing execution to be processed (queue size ${size})`);
72
+ this.pendingExecutionQueue.enqueue(cloneDeep(ex));
73
+ }
74
+ getClusterAnalytics() {
75
+ return this.clusterMasterServer.getClusterAnalytics();
76
+ }
77
+ async waitForExecutionStatus(exId, _status) {
78
+ const status = _status || 'stopped';
79
+ return new Promise((resolve) => {
80
+ const checkCluster = async () => {
81
+ const state = this.clusterService.getClusterState();
82
+ const dict = Object.create(null);
83
+ Object.values(state).forEach((node) => node.active.forEach((worker) => {
84
+ if (worker.ex_id) {
85
+ dict[worker.ex_id] = true;
86
+ }
87
+ }));
88
+ // if found, do not resolve
89
+ if (dict[exId]) {
90
+ setTimeout(checkCluster, 3000);
91
+ return;
92
+ }
93
+ try {
94
+ await this.executionStorage.verifyStatusUpdate(exId, status);
95
+ await this.executionStorage.setStatus(exId, status);
96
+ }
97
+ catch (err) {
98
+ logError(this.logger, err, `failure setting execution, ${exId}, to ${status}`);
99
+ }
100
+ finally {
101
+ resolve(true);
102
+ }
103
+ };
104
+ checkCluster();
105
+ });
106
+ }
107
+ async shutdown() {
108
+ this.logger.info('shutting down');
109
+ clearInterval(this.allocateInterval);
110
+ clearInterval(this.reapInterval);
111
+ this.allocateInterval = undefined;
112
+ this.reapInterval = undefined;
113
+ const query = this.executionStorage.getLivingStatuses().map((str) => `_status:${str}`)
114
+ .join(' OR ');
115
+ const executions = await this.executionStorage.search(query);
116
+ await Promise.all(executions.map(async (execution) => {
117
+ if (!this.isNative)
118
+ return;
119
+ this.logger.warn(`marking execution ex_id: ${execution.ex_id}, job_id: ${execution.job_id} as terminated`);
120
+ const exId = execution.ex_id;
121
+ const { hostname } = this.context.sysconfig.teraslice;
122
+ // need to exclude sending a stop to cluster master host, the shutdown event
123
+ // has already been propagated this can cause a condition of it waiting for
124
+ // stop to return but it already has which pauses this service shutdown
125
+ await this.stopExecution(exId, { excludeNode: hostname });
126
+ await this.waitForExecutionStatus(exId, 'terminated');
127
+ }));
128
+ }
129
+ findAllWorkers() {
130
+ return flatten(Object.values(this.clusterService.getClusterState())
131
+ .filter((node) => node.state === 'connected')
132
+ .map((node) => {
133
+ const workers = node.active.filter(Boolean);
134
+ return workers.map((worker) => {
135
+ worker.node_id = node.node_id;
136
+ worker.hostname = node.hostname;
137
+ return worker;
138
+ });
139
+ }))
140
+ .filter(Boolean);
141
+ }
142
+ async addWorkers(exId, workerNum) {
143
+ return this.executionStorage.getActiveExecution(exId)
144
+ .then((execution) => this.clusterService.addWorkers(execution, workerNum));
145
+ }
146
+ async setWorkers(exId, workerNum) {
147
+ return this.executionStorage.getActiveExecution(exId)
148
+ .then((execution) => this.clusterService.setWorkers(execution, workerNum));
149
+ }
150
+ async removeWorkers(exId, workerNum) {
151
+ return this.executionStorage.getActiveExecution(exId)
152
+ .then((execution) => this.clusterService.removeWorkers(execution.ex_id, workerNum));
153
+ }
154
+ /**
155
+ * Check if the execution is in a terminal status
156
+ *
157
+ * @param {import('@terascope/job-components').ExecutionConfig} execution
158
+ * @returns {boolean}
159
+ */
160
+ isExecutionTerminal(execution) {
161
+ const terminalList = this.executionStorage.getTerminalStatuses();
162
+ return terminalList.find((tStat) => tStat === execution._status) != null;
163
+ }
164
+ // safely stop the execution without setting the ex status to stopping or stopped
165
+ async _finishExecution(exId, err) {
166
+ if (err) {
167
+ const error = new TSError(err, {
168
+ reason: `terminal error for execution: ${exId}, shutting down execution`,
169
+ context: {
170
+ ex_id: exId,
171
+ }
172
+ });
173
+ this.logger.error(error);
174
+ }
175
+ const execution = await this.getExecutionContext(exId);
176
+ if (!execution) {
177
+ throw new Error(`Execution: ${exId} was not found to finish execution`);
178
+ }
179
+ const status = execution._status;
180
+ if (['stopping', 'stopped'].includes(status)) {
181
+ this.logger.debug(`execution ${exId} is already stopping which means there is no need to stop the execution`);
182
+ return;
183
+ }
184
+ const runningStatuses = this.executionStorage.getRunningStatuses();
185
+ if (runningStatuses.includes(status)) {
186
+ // This should never happen. If we get here with a running status
187
+ // something has gone wrong. Mark execution as failed before shutdown.
188
+ this.logger.warn(`Cluster_master is changing status of execution ${exId} from ${status} to failed`);
189
+ await this.executionStorage.setStatus(exId, 'failed', this.executionStorage.executionMetaData(null, getFullErrorStack(err)));
190
+ }
191
+ this.logger.debug(`execution ${exId} finished, shutting down execution`);
192
+ try {
193
+ await this.clusterService.stopExecution(exId);
194
+ }
195
+ catch (stopErr) {
196
+ const stopError = new TSError(stopErr, {
197
+ reason: 'error finishing the execution',
198
+ context: {
199
+ ex_id: exId,
200
+ }
201
+ });
202
+ logError(this.logger, stopError);
203
+ }
204
+ }
205
+ async stopExecution(exId, options) {
206
+ const execution = await this.getExecutionContext(exId);
207
+ if (!execution) {
208
+ throw new Error(`Execution: ${exId} was not found`);
209
+ }
210
+ const isTerminal = this.isExecutionTerminal(execution);
211
+ if (!options.force) {
212
+ if (isTerminal) {
213
+ this.logger.info(`execution ${exId} is in terminal status "${execution._status}", it cannot be stopped`);
214
+ return;
215
+ }
216
+ if (execution._status === 'stopping') {
217
+ this.logger.info('execution is already stopping...');
218
+ // we are kicking this off in the background, not part of the promise chain
219
+ this.waitForExecutionStatus(exId);
220
+ return;
221
+ }
222
+ this.logger.debug(`stopping execution ${exId}...`, withoutNil(options));
223
+ await this.executionStorage.setStatus(exId, 'stopping');
224
+ }
225
+ else {
226
+ this.logger.info(`force stopping execution ${exId}...`, withoutNil(options));
227
+ }
228
+ await this.clusterService.stopExecution(exId, options);
229
+ // we are kicking this off in the background, not part of the promise chain
230
+ this.waitForExecutionStatus(exId);
231
+ }
232
+ async pauseExecution(exId) {
233
+ const status = 'paused';
234
+ const execution = await this.executionStorage.getActiveExecution(exId);
235
+ if (!this.clusterMasterServer.isClientReady(execution.ex_id)) {
236
+ throw new Error(`Execution ${execution.ex_id} is not available to pause`);
237
+ }
238
+ await this.clusterMasterServer.sendExecutionPause(exId);
239
+ await this.executionStorage.setStatus(exId, status);
240
+ return { status };
241
+ }
242
+ async resumeExecution(exId) {
243
+ const status = 'running';
244
+ const execution = await this.executionStorage.getActiveExecution(exId);
245
+ if (!this.clusterMasterServer.isClientReady(execution.ex_id)) {
246
+ throw new Error(`Execution ${execution.ex_id} is not available to resume`);
247
+ }
248
+ await this.clusterMasterServer.sendExecutionResume(execution.ex_id);
249
+ await this.executionStorage.setStatus(execution.ex_id, status);
250
+ return { status };
251
+ }
252
+ async getControllerStats(exId) {
253
+ // if no exId is provided it returns all running executions
254
+ const specificId = exId ?? false;
255
+ const exIds = await this.getRunningExecutions(exId);
256
+ const clients = this.clusterMasterServer.onlineClients.filter(({ clientId }) => {
257
+ if (specificId && clientId === specificId)
258
+ return true;
259
+ return includes(exIds, clientId);
260
+ });
261
+ function formatResponse(msg) {
262
+ const payload = get(msg, 'payload', {});
263
+ const identifiers = {
264
+ ex_id: payload.ex_id,
265
+ job_id: payload.job_id,
266
+ name: payload.name
267
+ };
268
+ return Object.assign(identifiers, payload.stats);
269
+ }
270
+ if (isEmpty(clients)) {
271
+ if (specificId) {
272
+ throw new TSError(`Could not find active slicer for ex_id: ${specificId}`, {
273
+ statusCode: 404
274
+ });
275
+ }
276
+ return [];
277
+ }
278
+ const promises = clients.map((client) => {
279
+ const { clientId } = client;
280
+ return this.clusterMasterServer
281
+ .sendExecutionAnalyticsRequest(clientId)
282
+ .then(formatResponse);
283
+ });
284
+ const results = await Promise.all(promises);
285
+ return multiFieldSort(results, ['name', 'started']).reverse();
286
+ }
287
+ /**
288
+ * Create a new execution context
289
+ *
290
+ * @param {string|import('@terascope/job-components').JobConfigParams} job
291
+ * @return {Promise<NewExecutionResult>}
292
+ */
293
+ async createExecutionContext(job) {
294
+ const ex = await this.executionStorage.create(job);
295
+ this.enqueue(ex);
296
+ return { job_id: ex.job_id, ex_id: ex.ex_id };
297
+ }
298
+ async getExecutionContext(exId) {
299
+ try {
300
+ const record = this.executionStorage.get(exId);
301
+ if (!record) {
302
+ throw new Error(`Execution ${exId} was not found`);
303
+ }
304
+ return record;
305
+ }
306
+ catch (err) {
307
+ logError(this.logger, err, `error getting execution context for ex: ${exId}`);
308
+ throw err;
309
+ }
310
+ }
311
+ async softDeleteExecutionContext(exId) {
312
+ const exIds = await this.getRunningExecutions(exId);
313
+ if (exIds.length > 0) {
314
+ throw new TSError(`Execution ${exId} is currently running, cannot delete a running execution.`, {
315
+ statusCode: 409
316
+ });
317
+ }
318
+ return this.executionStorage.softDelete(exId);
319
+ }
320
+ async getRunningExecutions(exId) {
321
+ let query = this.executionStorage.getRunningStatuses().map((state) => ` _status:${state} `)
322
+ .join('OR');
323
+ if (exId) {
324
+ query = `ex_id:"${exId}" AND (${query.trim()})`;
325
+ }
326
+ const exs = await this.executionStorage.search(query, undefined, undefined, '_created:desc');
327
+ return exs.map((ex) => ex.ex_id);
328
+ }
329
+ /**
330
+ * Recover the execution
331
+ *
332
+ * @param {string|import('@terascope/job-components').ExecutionConfig} exIdOrEx
333
+ * @param {import('@terascope/job-components').RecoveryCleanupType} [cleanupType]
334
+ * @return {Promise<NewExecutionResult>}
335
+ */
336
+ async recoverExecution(exIdOrEx, cleanupType) {
337
+ const recoverFromEx = isString(exIdOrEx)
338
+ ? await this.getExecutionContext(exIdOrEx)
339
+ : cloneDeep(exIdOrEx);
340
+ if (!recoverFromEx) {
341
+ throw new Error(`Could not find execution: ${exIdOrEx} to recover from`);
342
+ }
343
+ const ex = await this.executionStorage.createRecoveredExecution(recoverFromEx, cleanupType);
344
+ this.enqueue(ex);
345
+ return { job_id: ex.job_id, ex_id: ex.ex_id };
346
+ }
347
+ _executionAllocator() {
348
+ let allocatingExecution = false;
349
+ const allocator = async () => {
350
+ const canAllocate = !allocatingExecution
351
+ && this.pendingExecutionQueue.size() > 0
352
+ && this.clusterService.readyForAllocation();
353
+ if (!canAllocate)
354
+ return;
355
+ allocatingExecution = true;
356
+ let execution = this.pendingExecutionQueue.dequeue();
357
+ this.logger.info(`Scheduling execution: ${execution.ex_id}`);
358
+ try {
359
+ execution = await this.executionStorage.setStatus(execution.ex_id, 'scheduling');
360
+ execution = await this.clusterService.allocateSlicer(execution);
361
+ execution = await this.executionStorage.setStatus(execution.ex_id, 'initializing', {
362
+ slicer_port: execution.slicer_port,
363
+ slicer_hostname: execution.slicer_hostname
364
+ });
365
+ try {
366
+ await this.clusterService.allocateWorkers(execution, execution.workers);
367
+ }
368
+ catch (err) {
369
+ throw new TSError(err, {
370
+ reason: `Failure to allocateWorkers ${execution.ex_id}`
371
+ });
372
+ }
373
+ }
374
+ catch (err) {
375
+ const msg = `Failed to provision execution ${execution.ex_id}`;
376
+ const error = new TSError(err, {
377
+ reason: msg
378
+ });
379
+ this.logger.warn(msg);
380
+ try {
381
+ await this.executionStorage.setStatus(execution.ex_id, 'failed', this.executionStorage.executionMetaData(null, getFullErrorStack(error)));
382
+ }
383
+ catch (failedErr) {
384
+ this.logger.error(new TSError(err, {
385
+ reason: 'Failure to set execution status to failed after provision failed'
386
+ }));
387
+ }
388
+ const clusteringType = this.context.sysconfig.teraslice.cluster_manager_type;
389
+ if (clusteringType === 'kubernetes' || clusteringType === 'kubernetesV2') {
390
+ // Since this condition is only hit in cases where the pods
391
+ // are never scheduled, all this call to stopExecution
392
+ // accomplishes is to delete the k8s resources, which is
393
+ // probably just the k8s job for the execution controller.
394
+ // Calling delete on the worker deployment that doesn't
395
+ // exist is OK.
396
+ this.logger.warn(`Calling stopExecution on execution: ${execution.ex_id} to clean up k8s resources.`);
397
+ await this.clusterService.stopExecution(execution.ex_id);
398
+ }
399
+ }
400
+ finally {
401
+ allocatingExecution = false;
402
+ allocator();
403
+ }
404
+ };
405
+ return allocator;
406
+ }
407
+ async reapExecutions() {
408
+ // make sure to capture the error avoid throwing an
409
+ // unhandled rejection
410
+ try {
411
+ // sometimes in development an execution gets stuck in stopping
412
+ // status since the process gets force killed in before it
413
+ // can be updated to stopped.
414
+ const stopping = await this.executionStorage.search('_status:stopping');
415
+ for (const execution of stopping) {
416
+ const updatedAt = new Date(execution._updated).getTime();
417
+ const timeout = this.context.sysconfig.teraslice.shutdown_timeout;
418
+ const updatedWithTimeout = updatedAt + timeout;
419
+ // Since we don't want to break executions that actually are "stopping"
420
+ // we need to verify that the job has exceeded the shutdown timeout
421
+ if (Date.now() > updatedWithTimeout) {
422
+ this.logger.info(`stopping stuck executing ${execution._status} execution: ${execution.ex_id}`);
423
+ await this.executionStorage.setStatus(execution.ex_id, 'stopped');
424
+ }
425
+ }
426
+ }
427
+ catch (err) {
428
+ this.logger.error(err, 'failure reaping executions');
429
+ }
430
+ }
431
+ async listResourcesForJobId(jobId) {
432
+ return this.clusterService.listResourcesForJobId(jobId);
433
+ }
434
+ }
435
+ //# sourceMappingURL=execution.js.map
@@ -0,0 +1,6 @@
1
+ export * from './cluster/index.js';
2
+ export * from './api.js';
3
+ export * from './assets.js';
4
+ export * from './execution.js';
5
+ export * from './jobs.js';
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=interfaces.js.map