teraslice 3.3.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/LICENSE +202 -0
  2. package/package.json +25 -28
  3. package/dist/src/interfaces.js +0 -12
  4. package/dist/src/lib/cluster/cluster_master.js +0 -246
  5. package/dist/src/lib/cluster/node_master.js +0 -355
  6. package/dist/src/lib/cluster/services/api.js +0 -663
  7. package/dist/src/lib/cluster/services/assets.js +0 -224
  8. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/index.js +0 -192
  9. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/interfaces.js +0 -2
  10. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.js +0 -419
  11. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sDeploymentResource.js +0 -60
  12. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sJobResource.js +0 -55
  13. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sResource.js +0 -357
  14. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sServiceResource.js +0 -37
  15. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sState.js +0 -60
  16. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/utils.js +0 -170
  17. package/dist/src/lib/cluster/services/cluster/backends/native/dispatch.js +0 -13
  18. package/dist/src/lib/cluster/services/cluster/backends/native/index.js +0 -526
  19. package/dist/src/lib/cluster/services/cluster/backends/native/messaging.js +0 -548
  20. package/dist/src/lib/cluster/services/cluster/backends/state-utils.js +0 -26
  21. package/dist/src/lib/cluster/services/cluster/index.js +0 -13
  22. package/dist/src/lib/cluster/services/execution.js +0 -435
  23. package/dist/src/lib/cluster/services/index.js +0 -6
  24. package/dist/src/lib/cluster/services/interfaces.js +0 -2
  25. package/dist/src/lib/cluster/services/jobs.js +0 -458
  26. package/dist/src/lib/config/default-sysconfig.js +0 -25
  27. package/dist/src/lib/config/index.js +0 -20
  28. package/dist/src/lib/config/schemas/system.js +0 -360
  29. package/dist/src/lib/storage/analytics.js +0 -86
  30. package/dist/src/lib/storage/assets.js +0 -401
  31. package/dist/src/lib/storage/backends/elasticsearch_store.js +0 -496
  32. package/dist/src/lib/storage/backends/mappings/analytics.js +0 -20
  33. package/dist/src/lib/storage/backends/mappings/asset.js +0 -32
  34. package/dist/src/lib/storage/backends/mappings/ex.js +0 -53
  35. package/dist/src/lib/storage/backends/mappings/job.js +0 -42
  36. package/dist/src/lib/storage/backends/mappings/state.js +0 -16
  37. package/dist/src/lib/storage/backends/s3_store.js +0 -237
  38. package/dist/src/lib/storage/execution.js +0 -302
  39. package/dist/src/lib/storage/index.js +0 -7
  40. package/dist/src/lib/storage/jobs.js +0 -81
  41. package/dist/src/lib/storage/state.js +0 -254
  42. package/dist/src/lib/utils/api_utils.js +0 -128
  43. package/dist/src/lib/utils/asset_utils.js +0 -94
  44. package/dist/src/lib/utils/date_utils.js +0 -52
  45. package/dist/src/lib/utils/encoding_utils.js +0 -27
  46. package/dist/src/lib/utils/events.js +0 -4
  47. package/dist/src/lib/utils/file_utils.js +0 -124
  48. package/dist/src/lib/utils/id_utils.js +0 -15
  49. package/dist/src/lib/utils/port_utils.js +0 -32
  50. package/dist/src/lib/workers/assets/index.js +0 -3
  51. package/dist/src/lib/workers/assets/loader-executable.js +0 -40
  52. package/dist/src/lib/workers/assets/loader.js +0 -73
  53. package/dist/src/lib/workers/assets/spawn.js +0 -55
  54. package/dist/src/lib/workers/context/execution-context.js +0 -12
  55. package/dist/src/lib/workers/context/terafoundation-context.js +0 -8
  56. package/dist/src/lib/workers/execution-controller/execution-analytics.js +0 -188
  57. package/dist/src/lib/workers/execution-controller/index.js +0 -1024
  58. package/dist/src/lib/workers/execution-controller/recovery.js +0 -151
  59. package/dist/src/lib/workers/execution-controller/scheduler.js +0 -390
  60. package/dist/src/lib/workers/execution-controller/slice-analytics.js +0 -96
  61. package/dist/src/lib/workers/helpers/job.js +0 -80
  62. package/dist/src/lib/workers/helpers/op-analytics.js +0 -22
  63. package/dist/src/lib/workers/helpers/terafoundation.js +0 -34
  64. package/dist/src/lib/workers/helpers/worker-shutdown.js +0 -147
  65. package/dist/src/lib/workers/metrics/index.js +0 -108
  66. package/dist/src/lib/workers/worker/index.js +0 -378
  67. package/dist/src/lib/workers/worker/slice.js +0 -122
  68. package/dist/test/config/schemas/system_schema-spec.js +0 -26
  69. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.js +0 -458
  70. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sResource-v2-spec.js +0 -818
  71. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-multicluster-v2-spec.js +0 -67
  72. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-v2-spec.js +0 -84
  73. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/utils-v2-spec.js +0 -320
  74. package/dist/test/lib/cluster/services/cluster/backends/state-utils-spec.js +0 -37
  75. package/dist/test/node_master-spec.js +0 -194
  76. package/dist/test/services/api-spec.js +0 -79
  77. package/dist/test/services/assets-spec.js +0 -158
  78. package/dist/test/services/messaging-spec.js +0 -440
  79. package/dist/test/storage/assets_storage-spec.js +0 -95
  80. package/dist/test/storage/s3_store-spec.js +0 -149
  81. package/dist/test/test.config.js +0 -23
  82. package/dist/test/test.setup.js +0 -6
  83. package/dist/test/utils/api_utils-spec.js +0 -25
  84. package/dist/test/utils/asset_utils-spec.js +0 -141
  85. package/dist/test/utils/elastic_utils-spec.js +0 -25
  86. package/dist/test/workers/execution-controller/execution-controller-spec.js +0 -371
  87. package/dist/test/workers/execution-controller/execution-special-test-cases-spec.js +0 -519
  88. package/dist/test/workers/execution-controller/execution-test-cases-spec.js +0 -343
  89. package/dist/test/workers/execution-controller/recovery-spec.js +0 -160
  90. package/dist/test/workers/execution-controller/scheduler-spec.js +0 -249
  91. package/dist/test/workers/execution-controller/slice-analytics-spec.js +0 -121
  92. package/dist/test/workers/fixtures/ops/example-op/processor.js +0 -20
  93. package/dist/test/workers/fixtures/ops/example-op/schema.js +0 -19
  94. package/dist/test/workers/fixtures/ops/example-reader/fetcher.js +0 -20
  95. package/dist/test/workers/fixtures/ops/example-reader/schema.js +0 -41
  96. package/dist/test/workers/fixtures/ops/example-reader/slicer.js +0 -37
  97. package/dist/test/workers/fixtures/ops/new-op/processor.js +0 -29
  98. package/dist/test/workers/fixtures/ops/new-op/schema.js +0 -18
  99. package/dist/test/workers/fixtures/ops/new-reader/fetcher.js +0 -19
  100. package/dist/test/workers/fixtures/ops/new-reader/schema.js +0 -23
  101. package/dist/test/workers/fixtures/ops/new-reader/slicer.js +0 -13
  102. package/dist/test/workers/helpers/configs.js +0 -128
  103. package/dist/test/workers/helpers/execution-controller-helper.js +0 -49
  104. package/dist/test/workers/helpers/index.js +0 -5
  105. package/dist/test/workers/helpers/test-context.js +0 -210
  106. package/dist/test/workers/helpers/zip-directory.js +0 -25
  107. package/dist/test/workers/worker/slice-spec.js +0 -333
  108. package/dist/test/workers/worker/worker-spec.js +0 -356
@@ -1,355 +0,0 @@
1
- import ms from 'ms';
2
- import { Mutex } from 'async-mutex';
3
- import { getFullErrorStack, debounce, isEmpty, has } from '@terascope/core-utils';
4
- import { makeLogger } from '../workers/helpers/terafoundation.js';
5
- import { Messaging } from './services/cluster/backends/native/messaging.js';
6
- import { spawnAssetLoader } from '../workers/assets/spawn.js';
7
- import { safeEncode } from '../utils/encoding_utils.js';
8
- import { findPort, getPorts } from '../utils/port_utils.js';
9
- import { getPackageJSON } from '../utils/file_utils.js';
10
- const nodeVersion = process.version;
11
- const terasliceVersion = getPackageJSON().version;
12
- export async function nodeMaster(context) {
13
- const logger = makeLogger(context, 'node_master');
14
- const configWorkerLimit = context.sysconfig.teraslice.workers;
15
- const config = context.sysconfig.teraslice;
16
- const events = context.apis.foundation.getSystemEvents();
17
- const mutex = new Mutex();
18
- const messaging = new Messaging(context, logger);
19
- const host = messaging.getHostUrl();
20
- const isShuttingDown = false;
21
- const ports = getPorts(context);
22
- logger.info(`node ${context.sysconfig._nodeName} is attempting to connect to cluster_master: ${host}`);
23
- function sendNodeStateNow() {
24
- if (isShuttingDown)
25
- return;
26
- const state = getNodeState();
27
- messaging.send({
28
- to: 'cluster_master',
29
- message: 'node:state',
30
- node_id: state.node_id,
31
- payload: state
32
- });
33
- }
34
- const sendNodeState = debounce(sendNodeStateNow, 500, { leading: false, trailing: true });
35
- let pendingAllocations = 0;
36
- function allocateWorkers(count, exConfig, fn) {
37
- const startTime = Date.now();
38
- pendingAllocations += count;
39
- sendNodeStateNow();
40
- const locked = mutex.isLocked() ? ' (locked)' : '';
41
- logger.info(`allocating ${count} workers...${locked}`);
42
- return mutex.runExclusive(async () => {
43
- try {
44
- await loadAssetsIfNeeded(exConfig.job, exConfig.ex_id);
45
- }
46
- catch (err) {
47
- logger.error(`Failure to allocated assets for execution ${exConfig.ex_id}`);
48
- throw err;
49
- }
50
- finally {
51
- pendingAllocations -= count;
52
- }
53
- try {
54
- const workers = await fn();
55
- const elapsed = Date.now() - startTime;
56
- if (workers.length === count) {
57
- logger.info(`allocated ${workers.length} workers, took ${ms(elapsed)}`);
58
- }
59
- else {
60
- logger.info(`allocated ${workers.length} out of the requested ${count} workers, took ${ms(elapsed)}`);
61
- }
62
- return workers.length;
63
- }
64
- catch (err) {
65
- logger.error(`Failure to allocate workers for execution ${exConfig.ex_id}`);
66
- throw err;
67
- }
68
- });
69
- }
70
- function canAllocateWorkers(requestedWorkers) {
71
- const numOfCurrentWorkers = Object.keys(context.cluster.workers).length;
72
- // if there is an over allocation, send back rest to be enqueued
73
- if (configWorkerLimit < numOfCurrentWorkers + requestedWorkers) {
74
- return configWorkerLimit - numOfCurrentWorkers > 0;
75
- }
76
- return true;
77
- }
78
- messaging.registerChildOnlineHook(sendNodeState);
79
- messaging.register({
80
- event: 'network:connect',
81
- callback: () => {
82
- logger.info(`node has successfully connected to: ${host}`);
83
- const state = getNodeState();
84
- messaging.send({
85
- to: 'cluster_master', message: 'node:online', node_id: state.node_id, payload: state
86
- });
87
- }
88
- });
89
- messaging.register({
90
- event: 'network:disconnect',
91
- callback: () => logger.info(`node has disconnected from: ${host}`)
92
- });
93
- messaging.register({
94
- event: 'network:error',
95
- callback: (err) => logger.warn(err, `Attempting to connect to cluster_master: ${host}`)
96
- });
97
- messaging.register({
98
- event: 'cluster:execution_controller:create',
99
- // TODO: type this
100
- callback: (createSlicerRequest) => {
101
- const createSlicerMsg = createSlicerRequest.payload;
102
- logger.info(`starting execution_controller for execution ${createSlicerMsg.ex_id}...`);
103
- allocateWorkers(1, createSlicerMsg, async () => {
104
- const controllerContext = {
105
- assignment: 'execution_controller',
106
- NODE_TYPE: 'execution_controller',
107
- EX: safeEncode(createSlicerMsg.job),
108
- job: createSlicerMsg.job,
109
- node_id: context.sysconfig._nodeName,
110
- ex_id: createSlicerMsg.ex_id,
111
- job_id: createSlicerMsg.job_id,
112
- slicer_port: createSlicerMsg.slicer_port
113
- };
114
- logger.trace('starting a execution controller', controllerContext);
115
- return context.apis.foundation.startWorkers(1, controllerContext);
116
- })
117
- .then(() => messaging.respond(createSlicerRequest))
118
- .catch((error) => {
119
- messaging.respond(createSlicerRequest, {
120
- error: getFullErrorStack(error),
121
- });
122
- });
123
- }
124
- });
125
- messaging.register({
126
- event: 'cluster:workers:create',
127
- callback: (createWorkerRequest) => {
128
- const createWorkerMsg = createWorkerRequest.payload;
129
- const requestedWorkers = createWorkerMsg.workers;
130
- logger.info(`starting ${requestedWorkers} workers for execution ${createWorkerMsg.ex_id}...`);
131
- if (!canAllocateWorkers(requestedWorkers)) {
132
- logger.warn(`worker is overallocated, maximum number of workers of ${configWorkerLimit}`);
133
- messaging.respond(createWorkerRequest, {
134
- payload: {
135
- createdWorkers: 0,
136
- }
137
- });
138
- return;
139
- }
140
- allocateWorkers(requestedWorkers, createWorkerMsg, async () => {
141
- let newWorkers = requestedWorkers;
142
- const numOfCurrentWorkers = Object.keys(context.cluster.workers).length;
143
- // if there is an over allocation, send back rest to be enqueued
144
- if (configWorkerLimit < numOfCurrentWorkers + requestedWorkers) {
145
- newWorkers = configWorkerLimit - numOfCurrentWorkers;
146
- logger.warn(`worker allocation request would exceed maximum number of workers of ${configWorkerLimit}`);
147
- logger.warn(`reducing allocation to ${newWorkers} workers.`);
148
- }
149
- let workers = [];
150
- if (newWorkers > 0) {
151
- logger.trace(`starting ${newWorkers} workers`, createWorkerMsg.ex_id);
152
- workers = context.apis.foundation.startWorkers(newWorkers, {
153
- NODE_TYPE: 'worker',
154
- EX: safeEncode(createWorkerMsg.job),
155
- assignment: 'worker',
156
- node_id: context.sysconfig._nodeName,
157
- job: createWorkerMsg.job,
158
- ex_id: createWorkerMsg.ex_id,
159
- job_id: createWorkerMsg.job_id
160
- });
161
- }
162
- return workers;
163
- })
164
- .then((createdWorkers) => messaging.respond(createWorkerRequest, {
165
- payload: {
166
- createdWorkers,
167
- }
168
- }))
169
- .catch(() => messaging.respond(createWorkerRequest, {
170
- payload: {
171
- createdWorkers: 0,
172
- }
173
- }));
174
- }
175
- });
176
- messaging.register({ event: 'cluster:node:state', callback: () => sendNodeState() });
177
- // this fires when entire server will be shutdown
178
- events.once('terafoundation:shutdown', () => {
179
- logger.debug('received shutdown notice from terafoundation');
180
- const filterFn = () => context.cluster.workers;
181
- const isActionCompleteFn = () => isEmpty(getNodeState().active);
182
- shutdownProcesses({}, filterFn, isActionCompleteFn, true);
183
- });
184
- messaging.register({
185
- event: 'cluster:execution:stop',
186
- callback: (networkMsg) => {
187
- const exId = networkMsg.ex_id;
188
- logger.debug(`received cluster execution stop for execution ${exId}`);
189
- const filterFn = () => {
190
- return Object.values(context.cluster.workers)
191
- .filter((worker) => {
192
- return worker.ex_id === exId;
193
- });
194
- };
195
- function actionCompleteFn() {
196
- const children = getNodeState().active;
197
- const workers = children.filter((worker) => worker.ex_id === exId);
198
- logger.debug(`waiting for ${workers.length} to stop for ex: ${exId}`);
199
- return workers.length === 0;
200
- }
201
- shutdownProcesses(networkMsg, filterFn, actionCompleteFn);
202
- }
203
- });
204
- messaging.register({
205
- event: 'cluster:workers:remove',
206
- callback: (networkMsg) => {
207
- const numberToRemove = networkMsg.payload.workers;
208
- const children = getNodeState().active;
209
- const startingWorkerCount = children.filter((worker) => worker.ex_id === networkMsg.ex_id && worker.assignment === 'worker').length;
210
- const filterFn = () => children.filter((worker) => worker.ex_id === networkMsg.ex_id && worker.assignment === 'worker').slice(0, numberToRemove);
211
- function actionCompleteFn() {
212
- const childWorkers = getNodeState().active;
213
- const currentWorkersForJob = childWorkers.filter((worker) => worker.ex_id === networkMsg.ex_id && worker.assignment === 'worker').length;
214
- return currentWorkersForJob + numberToRemove <= startingWorkerCount;
215
- }
216
- shutdownProcesses(networkMsg, filterFn, actionCompleteFn);
217
- }
218
- });
219
- // used to find an open port for slicer
220
- messaging.register({
221
- event: 'cluster:node:get_port',
222
- callback: async (msg) => {
223
- const port = await findPort(ports);
224
- logger.debug(`assigning port ${port} for new job`);
225
- messaging.respond(msg, { port });
226
- }
227
- });
228
- messaging.register({
229
- event: 'cluster:error:terminal',
230
- callback: () => {
231
- logger.error('terminal error in cluster_master, flushing logs and shutting down');
232
- logger.flush()
233
- .then(() => process.exit(0));
234
- }
235
- });
236
- messaging.register({
237
- event: 'child:exit',
238
- callback: () => sendNodeState()
239
- });
240
- function getAssetsFromJob(jobStr) {
241
- const job = typeof jobStr === 'string' ? JSON.parse(jobStr) : jobStr;
242
- return job.assets || [];
243
- }
244
- async function loadAssetsIfNeeded(job, exId) {
245
- const assets = getAssetsFromJob(job);
246
- if (!assets.length)
247
- return;
248
- logger.info(`node ${context.sysconfig._nodeName} is checking assets for job, exId: ${exId}`);
249
- await spawnAssetLoader(assets, context);
250
- }
251
- function shutdownWorkers(signal, filterFn) {
252
- const allWorkersForJob = filterFn();
253
- allWorkersForJob.forEach((worker) => {
254
- const workerID = worker.worker_id || worker.id;
255
- if (has(context.cluster.workers, workerID)) {
256
- const clusterWorker = context.cluster.workers[workerID];
257
- const processId = clusterWorker.process.pid;
258
- if (clusterWorker.isDead())
259
- return;
260
- // if the worker has already been sent a SIGTERM signal it should send a SIGKILL
261
- logger.warn(`sending ${signal} to process ${processId}, assignment: ${worker.assignment}, ex_id: ${worker.ex_id}`);
262
- clusterWorker.kill(signal);
263
- }
264
- });
265
- }
266
- function shutdownProcesses(message, filterFn, isActionCompleteFn, onlySigKill = false) {
267
- const intervalTime = 200;
268
- const needsResponse = message.response && message.to;
269
- // give a little extra time to finish shutting down
270
- let stopTime = config.shutdown_timeout + 3000;
271
- if (!onlySigKill) {
272
- shutdownWorkers('SIGTERM', filterFn);
273
- }
274
- const stop = setInterval(() => {
275
- if (isActionCompleteFn()) {
276
- clearInterval(stop);
277
- if (needsResponse)
278
- messaging.respond(message);
279
- }
280
- if (stopTime <= 0) {
281
- clearInterval(stop);
282
- shutdownWorkers('SIGKILL', filterFn);
283
- if (needsResponse)
284
- messaging.respond(message);
285
- }
286
- stopTime -= intervalTime;
287
- }, intervalTime);
288
- }
289
- function getNodeState() {
290
- const nodeId = context.sysconfig._nodeName;
291
- const state = {
292
- node_id: nodeId,
293
- hostname: context.sysconfig.teraslice.hostname,
294
- pid: process.pid,
295
- node_version: nodeVersion,
296
- teraslice_version: terasliceVersion,
297
- total: context.sysconfig.teraslice.workers,
298
- state: 'connected'
299
- };
300
- const clusterWorkers = context.cluster.workers;
301
- const active = [];
302
- Object.values(clusterWorkers).forEach((worker) => {
303
- const child = {
304
- worker_id: worker.id,
305
- assignment: worker.assignment,
306
- pid: worker.process.pid
307
- };
308
- if (worker.ex_id) {
309
- child.ex_id = worker.ex_id;
310
- }
311
- if (worker.job_id) {
312
- child.job_id = worker.job_id;
313
- }
314
- if (worker.assets) {
315
- child.assets = worker.assets.map((asset) => asset.id);
316
- }
317
- active.push(child);
318
- });
319
- const total = state.total;
320
- state.available = total - active.length - pendingAllocations;
321
- state.active = active;
322
- return state;
323
- }
324
- messaging.listen({
325
- query: {
326
- node_id: context.sysconfig._nodeName
327
- }
328
- });
329
- if (context.sysconfig.teraslice.master) {
330
- logger.debug(`node ${context.sysconfig._nodeName} is creating the cluster_master`);
331
- const [clusterMaster] = context.apis.foundation.startWorkers(1, {
332
- assignment: 'cluster_master',
333
- assets_port: ports.assetsPort,
334
- node_id: context.sysconfig._nodeName
335
- });
336
- clusterMaster.on('exit', (code) => {
337
- if (code !== 0) {
338
- throw Error(`Cluster master has shutdown with exit code ${code}!`);
339
- }
340
- });
341
- logger.debug(`node ${context.sysconfig._nodeName} is creating assets endpoint on port ${ports.assetsPort}`);
342
- const [assetService] = context.apis.foundation.startWorkers(1, {
343
- assignment: 'assets_service',
344
- // key needs to be called port to bypass cluster port sharing
345
- port: ports.assetsPort,
346
- node_id: context.sysconfig._nodeName
347
- });
348
- assetService.on('exit', (code) => {
349
- if (code !== 0) {
350
- throw Error(`Asset Service has shutdown with exit code ${code}!`);
351
- }
352
- });
353
- }
354
- }
355
- //# sourceMappingURL=node_master.js.map