teraslice 0.87.0 → 0.88.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/cluster-service.js +24 -18
  2. package/dist/src/index.js +42 -0
  3. package/package.json +11 -15
  4. package/service.js +4 -6
  5. package/worker-service.js +6 -6
  6. package/index.js +0 -21
  7. package/lib/cluster/cluster_master.js +0 -164
  8. package/lib/cluster/node_master.js +0 -393
  9. package/lib/cluster/services/api.js +0 -581
  10. package/lib/cluster/services/assets.js +0 -211
  11. package/lib/cluster/services/cluster/backends/kubernetes/deployments/worker.hbs +0 -86
  12. package/lib/cluster/services/cluster/backends/kubernetes/index.js +0 -225
  13. package/lib/cluster/services/cluster/backends/kubernetes/jobs/execution_controller.hbs +0 -69
  14. package/lib/cluster/services/cluster/backends/kubernetes/k8s.js +0 -450
  15. package/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +0 -443
  16. package/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +0 -67
  17. package/lib/cluster/services/cluster/backends/kubernetes/utils.js +0 -58
  18. package/lib/cluster/services/cluster/backends/native/index.js +0 -611
  19. package/lib/cluster/services/cluster/backends/native/messaging.js +0 -563
  20. package/lib/cluster/services/cluster/backends/state-utils.js +0 -49
  21. package/lib/cluster/services/cluster/index.js +0 -15
  22. package/lib/cluster/services/execution.js +0 -459
  23. package/lib/cluster/services/jobs.js +0 -303
  24. package/lib/config/default-sysconfig.js +0 -47
  25. package/lib/config/index.js +0 -32
  26. package/lib/config/schemas/system.js +0 -333
  27. package/lib/processors/save_file/index.js +0 -9
  28. package/lib/processors/save_file/processor.js +0 -17
  29. package/lib/processors/save_file/schema.js +0 -17
  30. package/lib/processors/script.js +0 -130
  31. package/lib/processors/stdout/index.js +0 -9
  32. package/lib/processors/stdout/processor.js +0 -19
  33. package/lib/processors/stdout/schema.js +0 -18
  34. package/lib/storage/analytics.js +0 -106
  35. package/lib/storage/assets.js +0 -275
  36. package/lib/storage/backends/elasticsearch_store.js +0 -567
  37. package/lib/storage/backends/mappings/analytics.json +0 -49
  38. package/lib/storage/backends/mappings/asset.json +0 -40
  39. package/lib/storage/backends/mappings/ex.json +0 -55
  40. package/lib/storage/backends/mappings/job.json +0 -31
  41. package/lib/storage/backends/mappings/state.json +0 -37
  42. package/lib/storage/execution.js +0 -331
  43. package/lib/storage/index.js +0 -16
  44. package/lib/storage/jobs.js +0 -97
  45. package/lib/storage/state.js +0 -302
  46. package/lib/utils/api_utils.js +0 -173
  47. package/lib/utils/asset_utils.js +0 -117
  48. package/lib/utils/date_utils.js +0 -58
  49. package/lib/utils/encoding_utils.js +0 -29
  50. package/lib/utils/events.js +0 -7
  51. package/lib/utils/file_utils.js +0 -118
  52. package/lib/utils/id_utils.js +0 -19
  53. package/lib/utils/port_utils.js +0 -83
  54. package/lib/workers/assets/loader.js +0 -109
  55. package/lib/workers/assets/spawn.js +0 -78
  56. package/lib/workers/context/execution-context.js +0 -16
  57. package/lib/workers/context/terafoundation-context.js +0 -10
  58. package/lib/workers/execution-controller/execution-analytics.js +0 -211
  59. package/lib/workers/execution-controller/index.js +0 -1033
  60. package/lib/workers/execution-controller/recovery.js +0 -188
  61. package/lib/workers/execution-controller/scheduler.js +0 -461
  62. package/lib/workers/execution-controller/slice-analytics.js +0 -115
  63. package/lib/workers/helpers/job.js +0 -93
  64. package/lib/workers/helpers/op-analytics.js +0 -22
  65. package/lib/workers/helpers/terafoundation.js +0 -43
  66. package/lib/workers/helpers/worker-shutdown.js +0 -187
  67. package/lib/workers/metrics/index.js +0 -139
  68. package/lib/workers/worker/index.js +0 -344
  69. package/lib/workers/worker/slice.js +0 -143
@@ -1,611 +0,0 @@
1
- 'use strict';
2
-
3
- const _ = require('lodash');
4
- const {
5
- Queue, TSError, getFullErrorStack, pDelay, cloneDeep
6
- } = require('@terascope/utils');
7
- const { makeLogger } = require('../../../../../workers/helpers/terafoundation');
8
- const stateUtils = require('../state-utils');
9
- const Messaging = require('./messaging');
10
-
11
- /*
12
- Execution Life Cycle for _status
13
- pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
14
- Exceptions
15
- rejected - when a job is rejected prior to scheduling
16
- failed - when there is an error while the job is running
17
- aborted - when a job was running at the point when the cluster shutsdown
18
- */
19
-
20
- module.exports = function nativeClustering(context, clusterMasterServer) {
21
- const events = context.apis.foundation.getSystemEvents();
22
- const logger = makeLogger(context, 'native_cluster_service');
23
- const pendingWorkerRequests = new Queue();
24
- const nodeDisconnectTimeout = context.sysconfig.teraslice.node_disconnect_timeout;
25
- const nodeStateInterval = context.sysconfig.teraslice.node_state_interval;
26
- const slicerAllocationAttempts = context.sysconfig.teraslice.slicer_allocation_attempts;
27
- const clusterState = {};
28
- const messaging = Messaging(context, logger);
29
-
30
- let exStore;
31
- let clusterStateInterval;
32
-
33
- // temporary holding spot used to attach nodes that are non responsive or
34
- // disconnect before final cleanup
35
- const droppedNodes = {};
36
-
37
- messaging.register({
38
- event: 'node:online',
39
- identifier: 'node_id',
40
- callback: (data, nodeId) => {
41
- logger.info(`node ${nodeId} has connected`);
42
- // if a reconnect happens stop timer
43
- if (droppedNodes[nodeId]) {
44
- clearTimeout(droppedNodes[nodeId]);
45
- delete droppedNodes[nodeId];
46
- }
47
- logger.trace(`node ${nodeId} has state:`, data.payload);
48
- clusterState[nodeId] = data.payload;
49
- // if new node comes online, check if jobs need more workers
50
- events.emit('cluster:available_workers');
51
- }
52
- });
53
-
54
- messaging.register({
55
- event: 'node:state',
56
- callback: (stateMsg) => {
57
- const data = stateMsg.payload;
58
- clusterState[data.node_id] = data;
59
- logger.trace(`node ${data.node_id} state is being updated`, data);
60
- // check to see if we can provision any additional workers
61
- events.emit('cluster:available_workers');
62
- }
63
- });
64
-
65
- messaging.register({
66
- event: 'network:error',
67
- callback: (err) => logger.error(err, 'cluster_master had an error with one of its connections')
68
- });
69
-
70
- messaging.register({
71
- event: 'network:disconnect',
72
- identifier: 'node_id',
73
- callback: (msg, nodeId) => {
74
- if (!clusterState[nodeId]) return;
75
-
76
- if (clusterState[nodeId].active.length === 0) {
77
- logger.warn(`node ${nodeId} has disconnected`);
78
- delete clusterState[nodeId];
79
- } else {
80
- clusterState[nodeId].state = 'disconnected';
81
- const timer = setTimeout(() => {
82
- _cleanUpNode(nodeId);
83
- }, nodeDisconnectTimeout);
84
-
85
- droppedNodes[nodeId] = timer;
86
- }
87
- }
88
- });
89
-
90
- function _cleanUpNode(nodeId) {
91
- // check workers and slicers
92
- const node = _checkNode(clusterState[nodeId]);
93
- // if disconnected node had a slicer, we stop the execution of each slicer on it
94
- // and mark it as failed
95
- if (node.hasSlicer) {
96
- _.forIn(node.slicerExecutions, async (exId) => {
97
- const errMsg = `node ${nodeId} has been disconnected from cluster_master past the allowed timeout, it has an active slicer for execution: ${exId} which will be marked as terminated and shut down`;
98
- logger.error(errMsg);
99
- const metaData = exStore.executionMetaData(null, errMsg);
100
- pendingWorkerRequests.remove(exId, 'ex_id');
101
-
102
- try {
103
- await exStore.setStatus(exId, 'terminated', metaData);
104
- } catch (err) {
105
- logger.error(err, `failure to set execution ${exId} status to terminated`);
106
- } finally {
107
- messaging.broadcast('cluster:execution:stop', { ex_id: exId });
108
- }
109
- });
110
- }
111
- // for any other worker not part of what is being shutdown, we attempt to reallocate
112
- _.forIn(node.workerExecutions, async (__, exId) => {
113
- // looking for unique ex_id's not in slicerJobID
114
- if (!node.slicerExecutions[exId]) {
115
- const activeWorkers = clusterState[nodeId].active;
116
- const numOfWorkers = activeWorkers.filter((worker) => worker.ex_id === exId).length;
117
-
118
- try {
119
- const execution = await exStore.getActiveExecution(exId);
120
- addWorkers(execution, numOfWorkers);
121
- } catch (err) {
122
- logger.error(err, `failure to add workers to execution ${exId}`);
123
- }
124
- }
125
- });
126
-
127
- // cleanup key so we don't get ever growing obj
128
- delete droppedNodes[nodeId];
129
- delete clusterState[nodeId];
130
- }
131
-
132
- function getClusterState() {
133
- return cloneDeep(clusterState);
134
- }
135
-
136
- function _checkNode(node) {
137
- const obj = {
138
- hasSlicer: false,
139
- numOfSlicers: 0,
140
- slicerExecutions: {},
141
- workerExecutions: {},
142
- numOfWorkers: 0,
143
- id: node.id,
144
- available: node.available
145
- };
146
-
147
- return node.active.reduce((prev, curr) => {
148
- if (curr.assignment === 'execution_controller') {
149
- prev.hasSlicer = true;
150
- prev.numOfSlicers += 1;
151
- prev.slicerExecutions[curr.ex_id] = curr.ex_id;
152
- }
153
-
154
- if (curr.assignment === 'worker') {
155
- prev.numOfWorkers += 1;
156
- // if not resgistered, set it to one, if so then increment it
157
- if (!prev.workerExecutions[curr.ex_id]) {
158
- prev.workerExecutions[curr.ex_id] = 1;
159
- } else {
160
- prev.workerExecutions[curr.ex_id] += 1;
161
- }
162
- }
163
-
164
- return prev;
165
- }, obj);
166
- }
167
-
168
- function _findNodeForSlicer(stateArray, errorNodes) {
169
- let slicerNode = null;
170
- for (let i = 0; i < stateArray.length; i += 1) {
171
- if (stateArray[i].state === 'connected' && stateArray[i].available > 0 && !errorNodes[stateArray[i].node_id]) {
172
- const node = _checkNode(stateArray[i]);
173
-
174
- if (!node.hasSlicer) {
175
- slicerNode = stateArray[i].node_id;
176
- break;
177
- }
178
- }
179
- }
180
-
181
- // if all nodes have a slicer
182
- if (!slicerNode) {
183
- // list is already sorted by num available since stateArray is sorted
184
- slicerNode = stateArray[0].node_id;
185
- }
186
-
187
- return slicerNode;
188
- }
189
-
190
- function _findNodesForExecution(exId, slicerOnly) {
191
- const nodes = [];
192
- _.forOwn(clusterState, (node) => {
193
- if (node.state !== 'disconnected') {
194
- const hasJob = node.active.filter((worker) => {
195
- if (slicerOnly) {
196
- return worker.ex_id === exId && worker.assignment === 'execution_controller';
197
- }
198
-
199
- return worker.ex_id === exId;
200
- });
201
-
202
- if (hasJob.length >= 1) {
203
- nodes.push({
204
- node_id: node.node_id,
205
- ex_id: exId,
206
- hostname: node.hostname,
207
- workers: hasJob
208
- });
209
- }
210
- }
211
- });
212
-
213
- return nodes;
214
- }
215
-
216
- function _availableWorkers(all, forceCheck) {
217
- let num = 0;
218
- // determine which key to search for in cluster state
219
- if (pendingWorkerRequests.size() === 0 || forceCheck) {
220
- const key = all ? 'total' : 'available';
221
-
222
- _.forOwn(clusterState, (node) => {
223
- if (node.state === 'connected') {
224
- num += node[key];
225
- }
226
- });
227
- }
228
-
229
- return num;
230
- }
231
-
232
- function _findPort(nodeId) {
233
- return messaging.send({
234
- to: 'node_master',
235
- address: nodeId,
236
- message: 'cluster:node:get_port',
237
- response: true
238
- });
239
- }
240
-
241
- function _makeDispatch() {
242
- const methods = {};
243
- const dispatch = {};
244
-
245
- methods.set = (nodeId, numOfWorkers) => {
246
- if (dispatch[nodeId]) {
247
- dispatch[nodeId] += numOfWorkers;
248
- } else {
249
- dispatch[nodeId] = numOfWorkers;
250
- }
251
- };
252
- methods.getDispatch = () => dispatch;
253
-
254
- return methods;
255
- }
256
-
257
- // designed to allocate additional workers, not any future slicers
258
- function allocateWorkers(execution, numOfWorkersRequested) {
259
- const exId = execution.ex_id;
260
- const jobId = execution.job_id;
261
- const jobStr = JSON.stringify(execution);
262
- const sortedNodes = _.orderBy(clusterState, 'available', 'desc');
263
- let workersRequested = numOfWorkersRequested;
264
- let availWorkers = _availableWorkers(false, true);
265
-
266
- const dispatch = _makeDispatch();
267
-
268
- while (workersRequested > 0 && availWorkers > 0) {
269
- for (let i = 0; i < sortedNodes.length; i += 1) {
270
- // each iteration check if it can allocate
271
- if (workersRequested > 0 && availWorkers > 0) {
272
- if (sortedNodes[i].available >= 1) {
273
- dispatch.set(sortedNodes[i].node_id, 1);
274
- availWorkers -= 1;
275
- workersRequested -= 1;
276
- }
277
- } else {
278
- break;
279
- }
280
- }
281
- }
282
- // if left over worker requests, enqueue them, queue works based off of id
283
- // so it redundantly references ex_id
284
-
285
- const workerData = {
286
- job: jobStr,
287
- id: exId,
288
- ex_id: exId,
289
- job_id: jobId,
290
- workers: 1,
291
- assignment: 'worker'
292
- };
293
-
294
- while (workersRequested > 0) {
295
- logger.trace(`adding worker to pending queue for ex: ${exId}`);
296
- pendingWorkerRequests.enqueue(workerData);
297
- workersRequested -= 1;
298
- }
299
- const results = [];
300
-
301
- _.forOwn(dispatch.getDispatch(), (workerCount, nodeId) => {
302
- const requestedWorkersData = {
303
- job: jobStr,
304
- id: exId,
305
- ex_id: exId,
306
- job_id: jobId,
307
- workers: workerCount,
308
- assignment: 'worker'
309
- };
310
-
311
- const createRequest = messaging.send({
312
- to: 'node_master',
313
- address: nodeId,
314
- message: 'cluster:workers:create',
315
- payload: requestedWorkersData,
316
- response: true
317
- }).then((msg) => {
318
- const createdWorkers = _.get(msg, 'payload.createdWorkers');
319
- if (!_.isInteger(createdWorkers)) {
320
- logger.error(`malformed response from create workers request to node ${nodeId}`, msg);
321
- return;
322
- }
323
- if (createdWorkers < workerCount) {
324
- logger.warn(`node ${nodeId} was only able to allocate ${createdWorkers} the request worker count of ${workerCount}, enqueing the remainder`);
325
- const newWorkersRequest = _.cloneDeep(requestedWorkersData);
326
- newWorkersRequest.workers = workerCount - createdWorkers;
327
- pendingWorkerRequests.enqueue(newWorkersRequest);
328
- } else {
329
- logger.debug(`node ${nodeId} allocated ${createdWorkers}`);
330
- }
331
- }).catch((err) => {
332
- logger.error(err, `An error has occurred in allocating : ${workerCount} workers to node : ${nodeId}, the worker request has been enqueued`);
333
- pendingWorkerRequests.enqueue(requestedWorkersData);
334
- });
335
-
336
- results.push(createRequest);
337
- });
338
-
339
- // this will resolve successfully if one worker was actually allocated
340
- return Promise.all(results);
341
- }
342
-
343
- async function _createSlicer(ex, errorNodes) {
344
- const execution = cloneDeep(ex);
345
- const sortedNodes = _.orderBy(clusterState, 'available', 'desc');
346
- const slicerNodeID = _findNodeForSlicer(sortedNodes, errorNodes);
347
-
348
- // need to mutate job so that workers will know the specific port and
349
- // hostname of the created slicer
350
- const portObj = await _findPort(slicerNodeID);
351
- execution.slicer_port = portObj.port;
352
- execution.slicer_hostname = clusterState[slicerNodeID].hostname;
353
-
354
- logger.debug(`node ${clusterState[slicerNodeID].hostname} has been elected for slicer, listening on port: ${portObj.port}`);
355
-
356
- const exId = execution.ex_id;
357
- const jobId = execution.job_id;
358
- const jobStr = JSON.stringify(execution);
359
-
360
- const data = {
361
- job: jobStr,
362
- ex_id: exId,
363
- job_id: jobId,
364
- workers: 1,
365
- slicer_port: portObj.port,
366
- node_id: slicerNodeID,
367
- assignment: 'execution_controller'
368
- };
369
-
370
- try {
371
- await messaging.send({
372
- to: 'node_master',
373
- address: slicerNodeID,
374
- message: 'cluster:execution_controller:create',
375
- payload: data,
376
- response: true
377
- });
378
- return execution;
379
- } catch (err) {
380
- const error = new TSError(err, {
381
- reason: `failed to allocate execution_controller to ${slicerNodeID}`
382
- });
383
- logger.error(error);
384
- errorNodes[slicerNodeID] = getFullErrorStack(error);
385
- throw err;
386
- }
387
- }
388
-
389
- async function allocateSlicer(ex) {
390
- let retryCount = 0;
391
- const errorNodes = {};
392
-
393
- async function _allocateSlicer() {
394
- try {
395
- return await _createSlicer(ex, errorNodes);
396
- } catch (err) {
397
- retryCount += 1;
398
- if (retryCount >= slicerAllocationAttempts) {
399
- throw new Error(`Failed to allocate execution_controller to nodes: ${JSON.stringify(errorNodes)}`);
400
- } else {
401
- await pDelay(100);
402
- return _allocateSlicer();
403
- }
404
- }
405
- }
406
- return _allocateSlicer();
407
- }
408
-
409
- const schedulePendingRequests = _.debounce(() => {
410
- if (pendingWorkerRequests.size() && _availableWorkers(false, true) >= 1) {
411
- const requestedWorker = pendingWorkerRequests.dequeue();
412
- const job = JSON.parse(requestedWorker.job);
413
-
414
- allocateWorkers(job, requestedWorker.workers)
415
- .catch((err) => {
416
- const error = new TSError(err, {
417
- reason: 'Error processing pending requests'
418
- });
419
- logger.error(error);
420
- });
421
- }
422
- }, 500, { leading: false, trailing: true });
423
-
424
- events.on('cluster:available_workers', schedulePendingRequests);
425
-
426
- function addWorkers(execution, workerNum) {
427
- const workerData = {
428
- job: JSON.stringify(execution),
429
- id: execution.ex_id,
430
- ex_id: execution.ex_id,
431
- job_id: execution.job_id,
432
- workers: workerNum,
433
- assignment: 'worker'
434
- };
435
- pendingWorkerRequests.enqueue(workerData);
436
- return { action: 'enqueued', ex_id: execution.ex_id, workerNum };
437
- }
438
-
439
- function setWorkers(execution, workerNum) {
440
- const totalWorker = stateUtils.findWorkersByExecutionID(
441
- clusterState,
442
- execution.ex_id
443
- ).length;
444
- if (totalWorker > workerNum) {
445
- const removedWorkersCount = totalWorker - workerNum;
446
- return removeWorkers(execution.ex_id, removedWorkersCount);
447
- }
448
- if (totalWorker < workerNum) {
449
- const addWorkersCount = workerNum - totalWorker;
450
- return addWorkers(execution, addWorkersCount);
451
- }
452
- // if they are equal then no work needs to be done
453
- return { action: 'set', ex_id: execution.ex_id, workerNum };
454
- }
455
-
456
- function removeWorkers(exId, workerNum) {
457
- const dispatch = _makeDispatch();
458
- const workers = stateUtils.findWorkersByExecutionID(clusterState, exId);
459
- let workerCount = workerNum;
460
- const workersData = workers.reduce((prev, curr) => {
461
- if (!prev[curr.node_id]) {
462
- prev[curr.node_id] = 1;
463
- } else {
464
- prev[curr.node_id] += 1;
465
- }
466
- prev._total += 1;
467
-
468
- return prev;
469
- }, { _total: 0 });
470
-
471
- if (workerNum >= workersData._total) {
472
- const errMsg = `workers to be removed: ${workerNum} cannot be >= to current workers: ${workersData._total}`;
473
- const error = new TSError(errMsg, {
474
- statusCode: 400,
475
- });
476
- logger.error(error);
477
- return Promise.reject(error);
478
- }
479
-
480
- function stateForDispatch(__, key) {
481
- if (key !== '_total') {
482
- if (workersData[key] >= 1 && workerCount > 0) {
483
- dispatch.set(key, 1);
484
- workersData[key] -= 1;
485
- workerCount -= 1;
486
- }
487
- }
488
- }
489
-
490
- while (workerCount) {
491
- _.forOwn(workersData, stateForDispatch);
492
- }
493
-
494
- const nodes = dispatch.getDispatch();
495
- const results = _.map(nodes, (val, key) => messaging.send({
496
- to: 'node_master',
497
- address: key,
498
- message: 'cluster:workers:remove',
499
- ex_id: exId,
500
- payload: { workers: val },
501
- response: true
502
- }));
503
-
504
- return Promise.all(results)
505
- .then(() => ({ action: 'remove', ex_id: exId, workerNum }))
506
- .catch((err) => {
507
- const error = new TSError(err, {
508
- reason: `Error while releasing workers from job ${exId}`
509
- });
510
- logger.error(error);
511
- return Promise.reject(error);
512
- });
513
- }
514
-
515
- function _notifyNodesWithExecution(exId, messageData, excludeNode) {
516
- return new Promise(((resolve, reject) => {
517
- let nodes = _findNodesForExecution(exId);
518
- if (excludeNode) {
519
- nodes = nodes.filter((node) => node.hostname !== excludeNode);
520
- } else if (messageData.message !== 'cluster:execution:stop' && nodes.length === 0) {
521
- // exclude node is only in regards to a shutdown on the cluster_master, which
522
- // already receives the shutdown notice so it can be empty, in all other
523
- // circumstances if the node list length is zero then reject
524
- const error = new Error(`Could not find active execution processes for ex_id: ${exId}`);
525
- error.code = 404;
526
- reject(error);
527
- return;
528
- }
529
-
530
- const promises = nodes.map((node) => {
531
- const sendingMsg = Object.assign(messageData, {
532
- to: 'node_master',
533
- address: node.node_id,
534
- ex_id: exId,
535
- response: false
536
- });
537
-
538
- logger.trace(`notifying node ${node.node_id} to stop execution ${exId}`, sendingMsg);
539
-
540
- return messaging.send(sendingMsg);
541
- });
542
-
543
- Promise.all(promises)
544
- .then(() => {
545
- resolve(true);
546
- })
547
- .catch((err) => {
548
- const error = new Error(`Failure to notify node with execution ${exId}, caused by ${err.message}`);
549
- logger.error(error);
550
- reject(error);
551
- });
552
- }));
553
- }
554
-
555
- function readyForAllocation() {
556
- return _availableWorkers() >= 2;
557
- }
558
-
559
- function clusterAvailable() {}
560
-
561
- function stopExecution(exId, timeout, exclude) {
562
- // we are allowing stopExecution to be non blocking, we block at api level
563
- const excludeNode = exclude || null;
564
- pendingWorkerRequests.remove(exId, 'ex_id');
565
- const sendingMessage = { message: 'cluster:execution:stop' };
566
- if (timeout) {
567
- sendingMessage.timeout = timeout;
568
- }
569
- return _notifyNodesWithExecution(exId, sendingMessage, excludeNode);
570
- }
571
-
572
- async function shutdown() {
573
- clearInterval(clusterStateInterval);
574
-
575
- logger.info('native clustering shutting down');
576
- if (messaging) {
577
- await messaging.shutdown();
578
- } else {
579
- await pDelay(100);
580
- }
581
- }
582
-
583
- async function initialize() {
584
- logger.info('native clustering initializing');
585
- exStore = context.stores.execution;
586
- if (!exStore) {
587
- throw new Error('Missing required stores');
588
- }
589
- const server = clusterMasterServer.httpServer;
590
- await messaging.listen({ server });
591
-
592
- clusterStateInterval = setInterval(() => {
593
- logger.trace('cluster_master requesting state update for all nodes');
594
- messaging.broadcast('cluster:node:state');
595
- }, nodeStateInterval);
596
- }
597
-
598
- return {
599
- getClusterState,
600
- allocateWorkers,
601
- allocateSlicer,
602
- initialize,
603
- shutdown,
604
- stopExecution,
605
- removeWorkers,
606
- addWorkers,
607
- setWorkers,
608
- readyForAllocation,
609
- clusterAvailable
610
- };
611
- };