teraslice 3.2.1 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/LICENSE +202 -0
  2. package/package.json +26 -30
  3. package/dist/src/interfaces.js +0 -12
  4. package/dist/src/lib/cluster/cluster_master.js +0 -246
  5. package/dist/src/lib/cluster/node_master.js +0 -355
  6. package/dist/src/lib/cluster/services/api.js +0 -663
  7. package/dist/src/lib/cluster/services/assets.js +0 -224
  8. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/index.js +0 -192
  9. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/interfaces.js +0 -2
  10. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.js +0 -419
  11. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sDeploymentResource.js +0 -60
  12. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sJobResource.js +0 -55
  13. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sResource.js +0 -357
  14. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sServiceResource.js +0 -37
  15. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sState.js +0 -60
  16. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/utils.js +0 -170
  17. package/dist/src/lib/cluster/services/cluster/backends/native/dispatch.js +0 -13
  18. package/dist/src/lib/cluster/services/cluster/backends/native/index.js +0 -526
  19. package/dist/src/lib/cluster/services/cluster/backends/native/messaging.js +0 -548
  20. package/dist/src/lib/cluster/services/cluster/backends/state-utils.js +0 -26
  21. package/dist/src/lib/cluster/services/cluster/index.js +0 -13
  22. package/dist/src/lib/cluster/services/execution.js +0 -435
  23. package/dist/src/lib/cluster/services/index.js +0 -6
  24. package/dist/src/lib/cluster/services/interfaces.js +0 -2
  25. package/dist/src/lib/cluster/services/jobs.js +0 -458
  26. package/dist/src/lib/config/default-sysconfig.js +0 -25
  27. package/dist/src/lib/config/index.js +0 -20
  28. package/dist/src/lib/config/schemas/system.js +0 -360
  29. package/dist/src/lib/storage/analytics.js +0 -86
  30. package/dist/src/lib/storage/assets.js +0 -401
  31. package/dist/src/lib/storage/backends/elasticsearch_store.js +0 -496
  32. package/dist/src/lib/storage/backends/mappings/analytics.js +0 -20
  33. package/dist/src/lib/storage/backends/mappings/asset.js +0 -32
  34. package/dist/src/lib/storage/backends/mappings/ex.js +0 -53
  35. package/dist/src/lib/storage/backends/mappings/job.js +0 -42
  36. package/dist/src/lib/storage/backends/mappings/state.js +0 -16
  37. package/dist/src/lib/storage/backends/s3_store.js +0 -237
  38. package/dist/src/lib/storage/execution.js +0 -302
  39. package/dist/src/lib/storage/index.js +0 -7
  40. package/dist/src/lib/storage/jobs.js +0 -81
  41. package/dist/src/lib/storage/state.js +0 -254
  42. package/dist/src/lib/utils/api_utils.js +0 -128
  43. package/dist/src/lib/utils/asset_utils.js +0 -94
  44. package/dist/src/lib/utils/date_utils.js +0 -52
  45. package/dist/src/lib/utils/encoding_utils.js +0 -27
  46. package/dist/src/lib/utils/events.js +0 -4
  47. package/dist/src/lib/utils/file_utils.js +0 -124
  48. package/dist/src/lib/utils/id_utils.js +0 -15
  49. package/dist/src/lib/utils/port_utils.js +0 -32
  50. package/dist/src/lib/workers/assets/index.js +0 -3
  51. package/dist/src/lib/workers/assets/loader-executable.js +0 -40
  52. package/dist/src/lib/workers/assets/loader.js +0 -73
  53. package/dist/src/lib/workers/assets/spawn.js +0 -55
  54. package/dist/src/lib/workers/context/execution-context.js +0 -12
  55. package/dist/src/lib/workers/context/terafoundation-context.js +0 -8
  56. package/dist/src/lib/workers/execution-controller/execution-analytics.js +0 -188
  57. package/dist/src/lib/workers/execution-controller/index.js +0 -1024
  58. package/dist/src/lib/workers/execution-controller/recovery.js +0 -151
  59. package/dist/src/lib/workers/execution-controller/scheduler.js +0 -390
  60. package/dist/src/lib/workers/execution-controller/slice-analytics.js +0 -96
  61. package/dist/src/lib/workers/helpers/job.js +0 -80
  62. package/dist/src/lib/workers/helpers/op-analytics.js +0 -22
  63. package/dist/src/lib/workers/helpers/terafoundation.js +0 -34
  64. package/dist/src/lib/workers/helpers/worker-shutdown.js +0 -147
  65. package/dist/src/lib/workers/metrics/index.js +0 -108
  66. package/dist/src/lib/workers/worker/index.js +0 -378
  67. package/dist/src/lib/workers/worker/slice.js +0 -122
  68. package/dist/test/config/schemas/system_schema-spec.js +0 -26
  69. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.js +0 -458
  70. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sResource-v2-spec.js +0 -818
  71. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-multicluster-v2-spec.js +0 -67
  72. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-v2-spec.js +0 -84
  73. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/utils-v2-spec.js +0 -320
  74. package/dist/test/lib/cluster/services/cluster/backends/state-utils-spec.js +0 -37
  75. package/dist/test/node_master-spec.js +0 -194
  76. package/dist/test/services/api-spec.js +0 -79
  77. package/dist/test/services/assets-spec.js +0 -158
  78. package/dist/test/services/messaging-spec.js +0 -440
  79. package/dist/test/storage/assets_storage-spec.js +0 -95
  80. package/dist/test/storage/s3_store-spec.js +0 -149
  81. package/dist/test/test.config.js +0 -23
  82. package/dist/test/test.setup.js +0 -6
  83. package/dist/test/utils/api_utils-spec.js +0 -25
  84. package/dist/test/utils/asset_utils-spec.js +0 -141
  85. package/dist/test/utils/elastic_utils-spec.js +0 -25
  86. package/dist/test/workers/execution-controller/execution-controller-spec.js +0 -371
  87. package/dist/test/workers/execution-controller/execution-special-test-cases-spec.js +0 -519
  88. package/dist/test/workers/execution-controller/execution-test-cases-spec.js +0 -343
  89. package/dist/test/workers/execution-controller/recovery-spec.js +0 -160
  90. package/dist/test/workers/execution-controller/scheduler-spec.js +0 -249
  91. package/dist/test/workers/execution-controller/slice-analytics-spec.js +0 -121
  92. package/dist/test/workers/fixtures/ops/example-op/processor.js +0 -20
  93. package/dist/test/workers/fixtures/ops/example-op/schema.js +0 -19
  94. package/dist/test/workers/fixtures/ops/example-reader/fetcher.js +0 -20
  95. package/dist/test/workers/fixtures/ops/example-reader/schema.js +0 -41
  96. package/dist/test/workers/fixtures/ops/example-reader/slicer.js +0 -37
  97. package/dist/test/workers/fixtures/ops/new-op/processor.js +0 -29
  98. package/dist/test/workers/fixtures/ops/new-op/schema.js +0 -18
  99. package/dist/test/workers/fixtures/ops/new-reader/fetcher.js +0 -19
  100. package/dist/test/workers/fixtures/ops/new-reader/schema.js +0 -23
  101. package/dist/test/workers/fixtures/ops/new-reader/slicer.js +0 -13
  102. package/dist/test/workers/helpers/configs.js +0 -128
  103. package/dist/test/workers/helpers/execution-controller-helper.js +0 -49
  104. package/dist/test/workers/helpers/index.js +0 -5
  105. package/dist/test/workers/helpers/test-context.js +0 -210
  106. package/dist/test/workers/helpers/zip-directory.js +0 -25
  107. package/dist/test/workers/worker/slice-spec.js +0 -333
  108. package/dist/test/workers/worker/worker-spec.js +0 -356
@@ -1,526 +0,0 @@
1
- /* eslint-disable @typescript-eslint/prefer-for-of */
2
- import { TSError, getFullErrorStack, debounce, pDelay, cloneDeep, pMap, orderBy, isInteger, get, Queue } from '@terascope/core-utils';
3
- import { Dispatch } from './dispatch.js';
4
- import { makeLogger } from '../../../../../workers/helpers/terafoundation.js';
5
- import { findWorkersByExecutionID } from '../state-utils.js';
6
- import { Messaging } from './messaging.js';
7
- export class NativeClustering {
8
- context;
9
- logger;
10
- events;
11
- executionStore;
12
- pendingWorkerRequests = new Queue();
13
- nodeStateInterval;
14
- slicerAllocationAttempts;
15
- clusterState = {};
16
- clusterStateInterval;
17
- messaging;
18
- droppedNodes = {};
19
- clusterMasterServer;
20
- constructor(context, clusterMasterServer) {
21
- this.context = context;
22
- this.events = context.apis.foundation.getSystemEvents();
23
- this.logger = makeLogger(context, 'native_cluster_service');
24
- const nodeDisconnectTimeout = context.sysconfig.teraslice.node_disconnect_timeout;
25
- this.nodeStateInterval = context.sysconfig.teraslice.node_state_interval;
26
- this.slicerAllocationAttempts = context.sysconfig.teraslice.slicer_allocation_attempts;
27
- this.messaging = new Messaging(context, this.logger);
28
- this.clusterMasterServer = clusterMasterServer;
29
- // temporary holding spot used to attach nodes that are non responsive or
30
- // disconnect before final cleanup
31
- this.messaging.register({
32
- event: 'node:online',
33
- identifier: 'node_id',
34
- callback: (data, nodeId) => {
35
- this.logger.info(`node ${nodeId} has connected`);
36
- // if a reconnect happens stop timer
37
- if (this.droppedNodes[nodeId]) {
38
- clearTimeout(this.droppedNodes[nodeId]);
39
- delete this.droppedNodes[nodeId];
40
- }
41
- this.logger.trace(`node ${nodeId} has state:`, data.payload);
42
- this.clusterState[nodeId] = data.payload;
43
- // if new node comes online, check if jobs need more workers
44
- this.events.emit('cluster:available_workers');
45
- }
46
- });
47
- this.messaging.register({
48
- event: 'node:state',
49
- callback: (stateMsg) => {
50
- const data = stateMsg.payload;
51
- this.clusterState[data.node_id] = data;
52
- this.logger.trace(`node ${data.node_id} state is being updated`, data);
53
- // check to see if we can provision any additional workers
54
- this.events.emit('cluster:available_workers');
55
- }
56
- });
57
- this.messaging.register({
58
- event: 'network:error',
59
- callback: (err) => this.logger.error(err, 'cluster_master had an error with one of its connections')
60
- });
61
- this.messaging.register({
62
- event: 'network:disconnect',
63
- identifier: 'node_id',
64
- callback: (msg, nodeId) => {
65
- if (!this.clusterState[nodeId])
66
- return;
67
- if (this.clusterState[nodeId].active.length === 0) {
68
- this.logger.warn(`node ${nodeId} has disconnected`);
69
- delete this.clusterState[nodeId];
70
- }
71
- else {
72
- this.clusterState[nodeId].state = 'disconnected';
73
- const timer = setTimeout(async () => {
74
- await this._cleanUpNode(nodeId);
75
- }, nodeDisconnectTimeout);
76
- this.droppedNodes[nodeId] = timer;
77
- }
78
- }
79
- });
80
- // TODO: should this be in initialize?
81
- const schedulePendingRequests = debounce(() => {
82
- if (this.pendingWorkerRequests.size() && this._availableWorkers(false, true) >= 1) {
83
- const requestedWorker = this.pendingWorkerRequests.dequeue();
84
- const job = JSON.parse(requestedWorker.job);
85
- this.allocateWorkers(job, requestedWorker.workers)
86
- .catch((err) => {
87
- const error = new TSError(err, {
88
- reason: 'Error processing pending requests'
89
- });
90
- this.logger.error(error);
91
- });
92
- }
93
- }, 500, { leading: false, trailing: true });
94
- this.events.on('cluster:available_workers', schedulePendingRequests);
95
- }
96
- async _cleanUpNode(nodeId) {
97
- // check workers and slicers
98
- const node = this._checkNode(this.clusterState[nodeId]);
99
- // if disconnected node had a slicer, we stop the execution of each slicer on it
100
- // and mark it as failed
101
- if (node.hasSlicer) {
102
- await pMap(Object.values(node.slicerExecutions), async (exId) => {
103
- const errMsg = `node ${nodeId} has been disconnected from cluster_master past the allowed timeout, it has an active slicer for execution: ${exId} which will be marked as terminated and shut down`;
104
- this.logger.error(errMsg);
105
- const metaData = this.executionStore.executionMetaData(null, errMsg);
106
- this.pendingWorkerRequests.remove(exId, 'ex_id');
107
- try {
108
- await this.executionStore.setStatus(exId, 'terminated', metaData);
109
- }
110
- catch (err) {
111
- this.logger.error(err, `failure to set execution ${exId} status to terminated`);
112
- }
113
- finally {
114
- this.messaging.broadcast('cluster:execution:stop', { ex_id: exId });
115
- }
116
- });
117
- }
118
- // for any other worker not part of what is being shutdown, we attempt to reallocate
119
- await pMap(Object.keys(node.workerExecutions), async (exId) => {
120
- // looking for unique ex_id's not in slicerJobID
121
- if (!node.slicerExecutions[exId]) {
122
- const activeWorkers = this.clusterState[nodeId].active;
123
- const numOfWorkers = activeWorkers.filter((worker) => worker.ex_id === exId).length;
124
- try {
125
- const execution = await this.executionStore.getActiveExecution(exId);
126
- this.addWorkers(execution, numOfWorkers);
127
- }
128
- catch (err) {
129
- this.logger.error(err, `failure to add workers to execution ${exId}`);
130
- }
131
- }
132
- });
133
- // cleanup key so we don't get ever growing obj
134
- delete this.droppedNodes[nodeId];
135
- delete this.clusterState[nodeId];
136
- }
137
- async initialize() {
138
- this.logger.info('native clustering initializing');
139
- this.executionStore = this.context.stores.executionStorage;
140
- if (!this.executionStore) {
141
- throw new Error('Missing required stores');
142
- }
143
- const server = this.clusterMasterServer.httpServer;
144
- await this.messaging.listen({ server });
145
- this.clusterStateInterval = setInterval(() => {
146
- this.logger.trace('cluster_master requesting state update for all nodes');
147
- this.messaging.broadcast('cluster:node:state');
148
- }, this.nodeStateInterval);
149
- }
150
- getClusterState() {
151
- return cloneDeep(this.clusterState);
152
- }
153
- _checkNode(node) {
154
- const obj = {
155
- hasSlicer: false,
156
- numOfSlicers: 0,
157
- slicerExecutions: {},
158
- workerExecutions: {},
159
- numOfWorkers: 0,
160
- available: node.available
161
- };
162
- return node.active.reduce((prev, curr) => {
163
- if (curr.assignment === 'execution_controller') {
164
- prev.hasSlicer = true;
165
- prev.numOfSlicers += 1;
166
- prev.slicerExecutions[curr.ex_id] = curr.ex_id;
167
- }
168
- if (curr.assignment === 'worker') {
169
- prev.numOfWorkers += 1;
170
- // if not resgistered, set it to one, if so then increment it
171
- if (!prev.workerExecutions[curr.ex_id]) {
172
- prev.workerExecutions[curr.ex_id] = 1;
173
- }
174
- else {
175
- prev.workerExecutions[curr.ex_id] += 1;
176
- }
177
- }
178
- return prev;
179
- }, obj);
180
- }
181
- _findNodeForSlicer(stateArray, errorNodes) {
182
- let slicerNode = null;
183
- for (let i = 0; i < stateArray.length; i += 1) {
184
- if (stateArray[i].state === 'connected' && stateArray[i].available > 0 && !errorNodes[stateArray[i].node_id]) {
185
- const node = this._checkNode(stateArray[i]);
186
- if (!node.hasSlicer) {
187
- slicerNode = stateArray[i].node_id;
188
- break;
189
- }
190
- }
191
- }
192
- // if all nodes have a slicer
193
- if (!slicerNode) {
194
- // list is already sorted by num available since stateArray is sorted
195
- slicerNode = stateArray[0].node_id;
196
- }
197
- return slicerNode;
198
- }
199
- _findNodesForExecution(exId, slicerOnly) {
200
- const nodes = [];
201
- for (const [, node] of Object.entries(this.clusterState)) {
202
- if (node.state !== 'disconnected') {
203
- const hasJob = node.active.filter((worker) => {
204
- if (slicerOnly) {
205
- return worker.ex_id === exId && worker.assignment === 'execution_controller';
206
- }
207
- return worker.ex_id === exId;
208
- });
209
- if (hasJob.length >= 1) {
210
- nodes.push({
211
- node_id: node.node_id,
212
- ex_id: exId,
213
- hostname: node.hostname,
214
- workers: hasJob
215
- });
216
- }
217
- }
218
- }
219
- return nodes;
220
- }
221
- _availableWorkers(all, forceCheck) {
222
- let num = 0;
223
- // determine which key to search for in cluster state
224
- if (this.pendingWorkerRequests.size() === 0 || forceCheck) {
225
- const key = all ? 'total' : 'available';
226
- for (const [, node] of Object.entries(this.clusterState)) {
227
- if (node.state === 'connected') {
228
- num += node[key];
229
- }
230
- }
231
- }
232
- return num;
233
- }
234
- _findPort(nodeId) {
235
- return this.messaging.send({
236
- to: 'node_master',
237
- address: nodeId,
238
- message: 'cluster:node:get_port',
239
- response: true
240
- });
241
- }
242
- // designed to allocate additional workers, not any future slicers
243
- async allocateWorkers(execution, numOfWorkersRequested) {
244
- const exId = execution.ex_id;
245
- const jobId = execution.job_id;
246
- const jobStr = JSON.stringify(execution);
247
- const sortedNodes = orderBy(this.clusterState, 'available', 'desc');
248
- let workersRequested = numOfWorkersRequested;
249
- let availWorkers = this._availableWorkers(false, true);
250
- const dispatch = new Dispatch();
251
- while (workersRequested > 0 && availWorkers > 0) {
252
- for (let i = 0; i < sortedNodes.length; i += 1) {
253
- // each iteration check if it can allocate
254
- if (workersRequested > 0 && availWorkers > 0) {
255
- if (sortedNodes[i].available >= 1) {
256
- dispatch.set(sortedNodes[i].node_id, 1);
257
- availWorkers -= 1;
258
- workersRequested -= 1;
259
- }
260
- }
261
- else {
262
- break;
263
- }
264
- }
265
- }
266
- // if left over worker requests, enqueue them, queue works based off of id
267
- // so it redundantly references ex_id
268
- const workerData = {
269
- job: jobStr,
270
- id: exId,
271
- ex_id: exId,
272
- job_id: jobId,
273
- workers: 1,
274
- assignment: 'worker'
275
- };
276
- while (workersRequested > 0) {
277
- this.logger.trace(`adding worker to pending queue for ex: ${exId}`);
278
- this.pendingWorkerRequests.enqueue(workerData);
279
- workersRequested -= 1;
280
- }
281
- const results = [];
282
- for (const [nodeId, workerCount] of Object.entries(dispatch.getDispatch())) {
283
- const requestedWorkersData = {
284
- job: jobStr,
285
- id: exId,
286
- ex_id: exId,
287
- job_id: jobId,
288
- workers: workerCount,
289
- assignment: 'worker'
290
- };
291
- const createRequest = this.messaging.send({
292
- to: 'node_master',
293
- address: nodeId,
294
- message: 'cluster:workers:create',
295
- payload: requestedWorkersData,
296
- response: true
297
- }).then((msg) => {
298
- const createdWorkers = get(msg, 'payload.createdWorkers');
299
- if (!isInteger(createdWorkers)) {
300
- this.logger.error(`malformed response from create workers request to node ${nodeId}`, msg);
301
- return;
302
- }
303
- if (createdWorkers < workerCount) {
304
- this.logger.warn(`node ${nodeId} was only able to allocate ${createdWorkers} the request worker count of ${workerCount}, enqueing the remainder`);
305
- const newWorkersRequest = cloneDeep(requestedWorkersData);
306
- newWorkersRequest.workers = workerCount - createdWorkers;
307
- this.pendingWorkerRequests.enqueue(newWorkersRequest);
308
- }
309
- else {
310
- this.logger.debug(`node ${nodeId} allocated ${createdWorkers}`);
311
- }
312
- })
313
- .catch((err) => {
314
- this.logger.error(err, `An error has occurred in allocating : ${workerCount} workers to node : ${nodeId}, the worker request has been enqueued`);
315
- this.pendingWorkerRequests.enqueue(requestedWorkersData);
316
- });
317
- results.push(createRequest);
318
- }
319
- // this will resolve successfully if one worker was actually allocated
320
- return Promise.all(results);
321
- }
322
- async _createSlicer(ex, errorNodes) {
323
- const execution = cloneDeep(ex);
324
- const sortedNodes = orderBy(this.clusterState, 'available', 'desc');
325
- const slicerNodeID = this._findNodeForSlicer(sortedNodes, errorNodes);
326
- // need to mutate job so that workers will know the specific port and
327
- // hostname of the created slicer
328
- const portObj = await this._findPort(slicerNodeID);
329
- execution.slicer_port = portObj.port;
330
- execution.slicer_hostname = this.clusterState[slicerNodeID].hostname;
331
- this.logger.debug(`node ${this.clusterState[slicerNodeID].hostname} has been elected for slicer, listening on port: ${portObj.port}`);
332
- const exId = execution.ex_id;
333
- const jobId = execution.job_id;
334
- const jobStr = JSON.stringify(execution);
335
- const data = {
336
- job: jobStr,
337
- ex_id: exId,
338
- job_id: jobId,
339
- workers: 1,
340
- slicer_port: portObj.port,
341
- node_id: slicerNodeID,
342
- assignment: 'execution_controller'
343
- };
344
- try {
345
- await this.messaging.send({
346
- to: 'node_master',
347
- address: slicerNodeID,
348
- message: 'cluster:execution_controller:create',
349
- payload: data,
350
- response: true
351
- });
352
- return execution;
353
- }
354
- catch (err) {
355
- const error = new TSError(err, {
356
- reason: `failed to allocate execution_controller to ${slicerNodeID}`
357
- });
358
- this.logger.error(error);
359
- errorNodes[slicerNodeID] = getFullErrorStack(error);
360
- throw err;
361
- }
362
- }
363
- async allocateSlicer(ex) {
364
- let retryCount = 0;
365
- const errorNodes = {};
366
- const _allocateSlicer = async () => {
367
- try {
368
- return await this._createSlicer(ex, errorNodes);
369
- }
370
- catch (err) {
371
- retryCount += 1;
372
- if (retryCount >= this.slicerAllocationAttempts) {
373
- throw new Error(`Failed to allocate execution_controller to nodes: ${JSON.stringify(errorNodes)}`);
374
- }
375
- else {
376
- await pDelay(100);
377
- return _allocateSlicer();
378
- }
379
- }
380
- };
381
- return _allocateSlicer();
382
- }
383
- addWorkers(execution, workerNum) {
384
- const workerData = {
385
- job: JSON.stringify(execution),
386
- id: execution.ex_id,
387
- ex_id: execution.ex_id,
388
- job_id: execution.job_id,
389
- workers: workerNum,
390
- assignment: 'worker'
391
- };
392
- this.pendingWorkerRequests.enqueue(workerData);
393
- return { action: 'enqueued', ex_id: execution.ex_id, workerNum };
394
- }
395
- setWorkers(execution, workerNum) {
396
- const totalWorker = findWorkersByExecutionID(this.clusterState, execution.ex_id).length;
397
- if (totalWorker > workerNum) {
398
- const removedWorkersCount = totalWorker - workerNum;
399
- return this.removeWorkers(execution.ex_id, removedWorkersCount);
400
- }
401
- if (totalWorker < workerNum) {
402
- const addWorkersCount = workerNum - totalWorker;
403
- return this.addWorkers(execution, addWorkersCount);
404
- }
405
- // if they are equal then no work needs to be done
406
- return { action: 'set', ex_id: execution.ex_id, workerNum };
407
- }
408
- removeWorkers(exId, workerNum) {
409
- const dispatch = new Dispatch();
410
- const workers = findWorkersByExecutionID(this.clusterState, exId);
411
- let workerCount = workerNum;
412
- const workersData = workers.reduce((prev, curr) => {
413
- if (!prev[curr.node_id]) {
414
- prev[curr.node_id] = 1;
415
- }
416
- else {
417
- prev[curr.node_id] += 1;
418
- }
419
- prev._total += 1;
420
- return prev;
421
- }, { _total: 0 });
422
- if (workerNum >= workersData._total) {
423
- const errMsg = `workers to be removed: ${workerNum} cannot be >= to current workers: ${workersData._total}`;
424
- const error = new TSError(errMsg, {
425
- statusCode: 400,
426
- });
427
- this.logger.error(error);
428
- return Promise.reject(error);
429
- }
430
- while (workerCount) {
431
- for (const [key] of Object.entries(workersData)) {
432
- if (key !== '_total') {
433
- if (workersData[key] >= 1 && workerCount > 0) {
434
- dispatch.set(key, 1);
435
- workersData[key] -= 1;
436
- workerCount -= 1;
437
- }
438
- }
439
- }
440
- }
441
- const nodes = dispatch.getDispatch();
442
- const messagesSent = [];
443
- for (const [key, val] of Object.entries(nodes)) {
444
- messagesSent.push(this.messaging.send({
445
- to: 'node_master',
446
- address: key,
447
- message: 'cluster:workers:remove',
448
- ex_id: exId,
449
- payload: { workers: val },
450
- response: true
451
- }));
452
- }
453
- return Promise.all(messagesSent)
454
- .then(() => ({ action: 'remove', ex_id: exId, workerNum }))
455
- .catch((err) => {
456
- const error = new TSError(err, {
457
- reason: `Error while releasing workers from job ${exId}`
458
- });
459
- this.logger.error(error);
460
- return Promise.reject(error);
461
- });
462
- }
463
- _notifyNodesWithExecution(exId, messageData, excludeNode) {
464
- return new Promise((resolve, reject) => {
465
- let nodes = this._findNodesForExecution(exId);
466
- if (excludeNode) {
467
- nodes = nodes.filter((node) => node.hostname !== excludeNode);
468
- }
469
- else if (messageData.message !== 'cluster:execution:stop' && nodes.length === 0) {
470
- // exclude node is only in regards to a shutdown on the cluster_master, which
471
- // already receives the shutdown notice so it can be empty, in all other
472
- // circumstances if the node list length is zero then reject
473
- const error = new TSError(`Could not find active execution processes for ex_id: ${exId}`);
474
- error.statusCode = 404;
475
- reject(error);
476
- return;
477
- }
478
- const promises = nodes.map((node) => {
479
- const sendingMsg = Object.assign(messageData, {
480
- to: 'node_master',
481
- address: node.node_id,
482
- ex_id: exId,
483
- response: false
484
- });
485
- this.logger.trace(`notifying node ${node.node_id} to stop execution ${exId}`, sendingMsg);
486
- return this.messaging.send(sendingMsg);
487
- });
488
- Promise.all(promises)
489
- .then(() => {
490
- resolve(true);
491
- })
492
- .catch((err) => {
493
- const error = new Error(`Failure to notify node with execution ${exId}, caused by ${err.message}`);
494
- this.logger.error(error);
495
- reject(error);
496
- });
497
- });
498
- }
499
- readyForAllocation() {
500
- return this._availableWorkers() >= 2;
501
- }
502
- clusterAvailable() { }
503
- async stopExecution(exId, options) {
504
- // we are allowing stopExecution to be non blocking, we block at api level
505
- this.pendingWorkerRequests.remove(exId, 'ex_id');
506
- const sendingMessage = { message: 'cluster:execution:stop' };
507
- if (options?.timeout) {
508
- sendingMessage.timeout = options.timeout;
509
- }
510
- return this._notifyNodesWithExecution(exId, sendingMessage, options?.excludeNode);
511
- }
512
- async shutdown() {
513
- clearInterval(this.clusterStateInterval);
514
- this.logger.info('native clustering shutting down');
515
- if (this.messaging) {
516
- await this.messaging.shutdown();
517
- }
518
- else {
519
- await pDelay(100);
520
- }
521
- }
522
- async listResourcesForJobId() {
523
- return [];
524
- }
525
- }
526
- //# sourceMappingURL=index.js.map