teraslice 2.17.2 → 3.0.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/lib/cluster/services/api.js +5 -8
- package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/index.js +1 -1
- package/dist/src/lib/cluster/services/cluster/index.js +0 -4
- package/dist/src/lib/cluster/services/execution.js +1 -1
- package/dist/src/lib/cluster/services/jobs.js +12 -8
- package/dist/src/lib/config/schemas/system.js +1 -1
- package/dist/src/lib/utils/api_utils.js +1 -41
- package/dist/src/lib/workers/helpers/worker-shutdown.js +4 -19
- package/dist/test/utils/api_utils-spec.js +1 -62
- package/package.json +11 -12
- package/dist/src/lib/cluster/services/cluster/backends/kubernetes/index.js +0 -192
- package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8s.js +0 -481
- package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +0 -414
- package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +0 -59
- package/dist/src/lib/cluster/services/cluster/backends/kubernetes/utils.js +0 -43
- package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.js +0 -316
- package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sResource-spec.js +0 -795
- package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sState-multicluster-spec.js +0 -67
- package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sState-spec.js +0 -84
- package/dist/test/lib/cluster/services/cluster/backends/kubernetes/utils-spec.js +0 -132
|
@@ -6,7 +6,7 @@ import { RecoveryCleanupType } from '@terascope/job-components';
|
|
|
6
6
|
import { parseErrorInfo, parseList, logError, TSError, startsWith, pWhile, isKey } from '@terascope/utils';
|
|
7
7
|
import { ExecutionStatusEnum } from '@terascope/types';
|
|
8
8
|
import { makeLogger } from '../../workers/helpers/terafoundation.js';
|
|
9
|
-
import {
|
|
9
|
+
import { makeTable, sendError, handleTerasliceRequest, getSearchOptions, createJobActiveQuery, addDeletedToQuery } from '../../utils/api_utils.js';
|
|
10
10
|
import { getPackageJSON } from '../../utils/file_utils.js';
|
|
11
11
|
const terasliceVersion = getPackageJSON().version;
|
|
12
12
|
function validateCleanupType(cleanupType) {
|
|
@@ -391,14 +391,10 @@ export class ApiService {
|
|
|
391
391
|
requestHandler(async () => executionService.getExecutionContext(exId));
|
|
392
392
|
});
|
|
393
393
|
v1routes.get('/cluster/stats', (req, res) => {
|
|
394
|
-
const { name: cluster } = this.context.sysconfig.teraslice;
|
|
395
394
|
const requestHandler = handleTerasliceRequest(req, res, 'Could not get cluster statistics');
|
|
396
395
|
requestHandler(async () => {
|
|
397
|
-
const stats =
|
|
398
|
-
|
|
399
|
-
return makePrometheus(stats, { cluster });
|
|
400
|
-
}
|
|
401
|
-
// for backwards compatability (unsupported for prometheus)
|
|
396
|
+
const stats = executionService.getClusterAnalytics();
|
|
397
|
+
// for backwards compatibility
|
|
402
398
|
// @ts-expect-error
|
|
403
399
|
stats.slicer = stats.controllers;
|
|
404
400
|
return stats;
|
|
@@ -420,7 +416,7 @@ export class ApiService {
|
|
|
420
416
|
if (this.clusterType === 'native') {
|
|
421
417
|
defaults = ['assignment', 'job_id', 'ex_id', 'node_id', 'pid'];
|
|
422
418
|
}
|
|
423
|
-
if (this.clusterType === '
|
|
419
|
+
if (this.clusterType === 'kubernetesV2') {
|
|
424
420
|
defaults = ['assignment', 'job_id', 'ex_id', 'node_id', 'pod_name', 'image'];
|
|
425
421
|
}
|
|
426
422
|
const requestHandler = handleTerasliceRequest(req, res, 'Could not get all workers');
|
|
@@ -628,6 +624,7 @@ export class ApiService {
|
|
|
628
624
|
}
|
|
629
625
|
}
|
|
630
626
|
}
|
|
627
|
+
// TODO: removing native clustering will remove the need for any here
|
|
631
628
|
const clusterState = this.clusterService.getClusterState();
|
|
632
629
|
/// Filter out information about kubernetes ex pods
|
|
633
630
|
const filteredExecutions = {};
|
|
@@ -12,7 +12,7 @@ import { K8sDeploymentResource } from './k8sDeploymentResource.js';
|
|
|
12
12
|
Exceptions
|
|
13
13
|
rejected - when a job is rejected prior to scheduling
|
|
14
14
|
failed - when there is an error while the job is running
|
|
15
|
-
aborted - when a job was running at the point when the cluster
|
|
15
|
+
aborted - when a job was running at the point when the cluster shuts down
|
|
16
16
|
*/
|
|
17
17
|
export class KubernetesClusterBackendV2 {
|
|
18
18
|
context;
|
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
import { NativeClustering } from './backends/native/index.js';
|
|
2
|
-
import { KubernetesClusterBackend } from './backends/kubernetes/index.js';
|
|
3
2
|
import { KubernetesClusterBackendV2 } from './backends/kubernetesV2/index.js';
|
|
4
3
|
export function makeClustering(context, { clusterMasterServer }) {
|
|
5
4
|
const clusterType = context.sysconfig.teraslice.cluster_manager_type;
|
|
6
5
|
if (clusterType === 'native') {
|
|
7
6
|
return new NativeClustering(context, clusterMasterServer);
|
|
8
7
|
}
|
|
9
|
-
if (clusterType === 'kubernetes') {
|
|
10
|
-
return new KubernetesClusterBackend(context, clusterMasterServer);
|
|
11
|
-
}
|
|
12
8
|
if (clusterType === 'kubernetesV2') {
|
|
13
9
|
return new KubernetesClusterBackendV2(context, clusterMasterServer);
|
|
14
10
|
}
|
|
@@ -386,7 +386,7 @@ export class ExecutionService {
|
|
|
386
386
|
}));
|
|
387
387
|
}
|
|
388
388
|
const clusteringType = this.context.sysconfig.teraslice.cluster_manager_type;
|
|
389
|
-
if (clusteringType === '
|
|
389
|
+
if (clusteringType === 'kubernetesV2') {
|
|
390
390
|
// Since this condition is only hit in cases where the pods
|
|
391
391
|
// are never scheduled, all this call to stopExecution
|
|
392
392
|
// accomplishes is to delete the k8s resources, which is
|
|
@@ -114,12 +114,14 @@ export class JobsService {
|
|
|
114
114
|
statusCode: 409
|
|
115
115
|
});
|
|
116
116
|
}
|
|
117
|
-
|
|
117
|
+
const currentResources = await this.executionService.listResourcesForJobId(jobId);
|
|
118
118
|
if (currentResources.length > 0) {
|
|
119
|
-
|
|
119
|
+
const flattenedResources = currentResources.flat();
|
|
120
120
|
const exIdsSet = new Set();
|
|
121
|
-
for (const resource of
|
|
122
|
-
|
|
121
|
+
for (const resource of flattenedResources) {
|
|
122
|
+
if (resource.metadata.labels) {
|
|
123
|
+
exIdsSet.add(resource.metadata.labels['teraslice.terascope.io/exId']);
|
|
124
|
+
}
|
|
123
125
|
}
|
|
124
126
|
const exIdsArr = Array.from(exIdsSet);
|
|
125
127
|
const exIdsString = exIdsArr.join(', ');
|
|
@@ -202,12 +204,14 @@ export class JobsService {
|
|
|
202
204
|
}
|
|
203
205
|
// This will return any orphaned resources in k8s clustering
|
|
204
206
|
// or an empty array in native clustering
|
|
205
|
-
|
|
207
|
+
const currentResources = await this.executionService.listResourcesForJobId(jobId);
|
|
206
208
|
if (currentResources.length > 0) {
|
|
207
|
-
|
|
209
|
+
const flattenedResources = currentResources.flat();
|
|
208
210
|
const exIdsSet = new Set();
|
|
209
|
-
for (const resource of
|
|
210
|
-
|
|
211
|
+
for (const resource of flattenedResources) {
|
|
212
|
+
if (resource.metadata.labels) {
|
|
213
|
+
exIdsSet.add(resource.metadata.labels['teraslice.terascope.io/exId']);
|
|
214
|
+
}
|
|
211
215
|
}
|
|
212
216
|
const exIdsArr = Array.from(exIdsSet);
|
|
213
217
|
const exIdsString = exIdsArr.join(', ');
|
|
@@ -209,7 +209,7 @@ export const schema = {
|
|
|
209
209
|
cluster_manager_type: {
|
|
210
210
|
doc: 'determines which cluster system should be used',
|
|
211
211
|
default: 'native',
|
|
212
|
-
format: ['native', '
|
|
212
|
+
format: ['native', 'kubernetesV2']
|
|
213
213
|
},
|
|
214
214
|
cpu: {
|
|
215
215
|
doc: 'number of cpus to reserve per teraslice worker in kubernetes',
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import Table from 'easy-table';
|
|
2
|
-
import { parseErrorInfo, parseList, logError, isString, get, toInteger, TSError
|
|
2
|
+
import { parseErrorInfo, parseList, logError, isString, get, toInteger, TSError } from '@terascope/utils';
|
|
3
3
|
export function makeTable(req, defaults, data, mappingFn) {
|
|
4
4
|
const query = fieldsQuery(req.query, defaults);
|
|
5
5
|
let emptyChar = 'N/A';
|
|
@@ -74,46 +74,6 @@ export function sendError(res, code, message, logger) {
|
|
|
74
74
|
message
|
|
75
75
|
});
|
|
76
76
|
}
|
|
77
|
-
// NOTE: This only works for counters, if you're trying to extend this, you
|
|
78
|
-
// should probably switch to using prom-client.
|
|
79
|
-
export function makePrometheus(stats, defaultLabels = {}) {
|
|
80
|
-
const metricMapping = {
|
|
81
|
-
processed: 'teraslice_slices_processed',
|
|
82
|
-
failed: 'teraslice_slices_failed',
|
|
83
|
-
queued: 'teraslice_slices_queued',
|
|
84
|
-
job_duration: '', // this isn't really useful, omitting
|
|
85
|
-
workers_joined: 'teraslice_workers_joined',
|
|
86
|
-
workers_disconnected: 'teraslice_workers_disconnected',
|
|
87
|
-
workers_reconnected: 'teraslice_workers_reconnected'
|
|
88
|
-
};
|
|
89
|
-
let returnString = '';
|
|
90
|
-
Object.entries(stats.controllers).forEach(([key, value]) => {
|
|
91
|
-
if (isKey(metricMapping, key)) {
|
|
92
|
-
const name = metricMapping[key];
|
|
93
|
-
if (name !== '') {
|
|
94
|
-
returnString += `# TYPE ${name} counter\n`;
|
|
95
|
-
const labels = makePrometheusLabels(defaultLabels);
|
|
96
|
-
returnString += `${name}${labels} ${value}\n`;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
});
|
|
100
|
-
return returnString;
|
|
101
|
-
}
|
|
102
|
-
function makePrometheusLabels(defaults = {}) {
|
|
103
|
-
const labels = Object.assign({}, defaults);
|
|
104
|
-
const keys = Object.keys(labels);
|
|
105
|
-
if (!keys.length)
|
|
106
|
-
return '';
|
|
107
|
-
const labelsStr = keys.map((key) => {
|
|
108
|
-
const val = labels[key];
|
|
109
|
-
return `${key}="${val}"`;
|
|
110
|
-
}).join(',');
|
|
111
|
-
return `{${labelsStr}}`;
|
|
112
|
-
}
|
|
113
|
-
export function isPrometheusTerasliceRequest(req) {
|
|
114
|
-
const acceptHeader = get(req, 'headers.accept', '');
|
|
115
|
-
return acceptHeader && acceptHeader.indexOf('application/openmetrics-text;') > -1;
|
|
116
|
-
}
|
|
117
77
|
function parseQueryInt(req, key, defaultVal) {
|
|
118
78
|
const val = req.query[key];
|
|
119
79
|
if (val == null || val === '')
|
|
@@ -27,16 +27,8 @@ export function shutdownHandler(context, shutdownFn) {
|
|
|
27
27
|
|| process.env.NODE_TYPE
|
|
28
28
|
|| process.env.assignment
|
|
29
29
|
|| 'unknown-assignment';
|
|
30
|
-
const clusteringType = get(context, 'sysconfig.teraslice.cluster_manager_type');
|
|
31
|
-
const isK8s = clusteringType === 'kubernetes' || clusteringType === 'kubernetesV2';
|
|
32
30
|
// this is native clustering only
|
|
33
31
|
const isProcessRestart = process.env.process_restart;
|
|
34
|
-
// everything but the k8s execution_controller should not be allowed be allowed to
|
|
35
|
-
// set a non-zero exit code (to avoid being restarted)
|
|
36
|
-
// This is overridden in V2 because it can restart
|
|
37
|
-
const allowNonZeroExitCode = !(isK8s
|
|
38
|
-
&& assignment === 'execution_controller'
|
|
39
|
-
&& context.sysconfig.teraslice.cluster_manager_type === 'kubernetes');
|
|
40
32
|
const api = {
|
|
41
33
|
exiting: false,
|
|
42
34
|
exit
|
|
@@ -68,7 +60,7 @@ export function shutdownHandler(context, shutdownFn) {
|
|
|
68
60
|
return `already shutting down, remaining ${ms(shutdownTimeout - elapsed)}`;
|
|
69
61
|
}
|
|
70
62
|
async function callShutdownFn(event, err) {
|
|
71
|
-
// avoid failing before the
|
|
63
|
+
// avoid failing before the promise is try / catched in pRaceWithTimeout
|
|
72
64
|
await pDelay(100);
|
|
73
65
|
await shutdownFn(event, err);
|
|
74
66
|
}
|
|
@@ -81,7 +73,6 @@ export function shutdownHandler(context, shutdownFn) {
|
|
|
81
73
|
async function exit(event, err) {
|
|
82
74
|
if (api.exiting)
|
|
83
75
|
return;
|
|
84
|
-
/// Potential logic for cluster_master and asset_service
|
|
85
76
|
if (err) {
|
|
86
77
|
if (err.name.includes('Error')) {
|
|
87
78
|
setStatusCode(1);
|
|
@@ -97,15 +88,9 @@ export function shutdownHandler(context, shutdownFn) {
|
|
|
97
88
|
}
|
|
98
89
|
finally {
|
|
99
90
|
await flushLogs();
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
process.exit();
|
|
104
|
-
}
|
|
105
|
-
else {
|
|
106
|
-
logger.info(`${assignment} shutdown took ${ms(Date.now() - startTime)}, exit with zero status code`);
|
|
107
|
-
process.exit(0);
|
|
108
|
-
}
|
|
91
|
+
const code = process.exitCode != null ? process.exitCode : 0;
|
|
92
|
+
logger.info(`${assignment} shutdown took ${ms(Date.now() - startTime)}, exit with ${code} status code`);
|
|
93
|
+
process.exit();
|
|
109
94
|
}
|
|
110
95
|
}
|
|
111
96
|
function setStatusCode(code) {
|
|
@@ -1,66 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createJobActiveQuery, addDeletedToQuery } from '../../src/lib/utils/api_utils.js';
|
|
2
2
|
describe('apiUtils', () => {
|
|
3
|
-
it('should be able make a prometheus text format', () => {
|
|
4
|
-
const stats = {
|
|
5
|
-
controllers: {
|
|
6
|
-
processed: 1000,
|
|
7
|
-
failed: 10,
|
|
8
|
-
queued: 5,
|
|
9
|
-
job_duration: 10,
|
|
10
|
-
workers_joined: 20,
|
|
11
|
-
workers_disconnected: 30,
|
|
12
|
-
workers_reconnected: 40
|
|
13
|
-
}
|
|
14
|
-
};
|
|
15
|
-
const r = `# TYPE teraslice_slices_processed counter
|
|
16
|
-
teraslice_slices_processed ${stats.controllers.processed}
|
|
17
|
-
# TYPE teraslice_slices_failed counter
|
|
18
|
-
teraslice_slices_failed ${stats.controllers.failed}
|
|
19
|
-
# TYPE teraslice_slices_queued counter
|
|
20
|
-
teraslice_slices_queued ${stats.controllers.queued}
|
|
21
|
-
# TYPE teraslice_workers_joined counter
|
|
22
|
-
teraslice_workers_joined ${stats.controllers.workers_joined}
|
|
23
|
-
# TYPE teraslice_workers_disconnected counter
|
|
24
|
-
teraslice_workers_disconnected ${stats.controllers.workers_disconnected}
|
|
25
|
-
# TYPE teraslice_workers_reconnected counter
|
|
26
|
-
teraslice_workers_reconnected ${stats.controllers.workers_reconnected}
|
|
27
|
-
`;
|
|
28
|
-
expect(makePrometheus(stats)).toEqual(r);
|
|
29
|
-
});
|
|
30
|
-
it('should be able make a prometheus text format with labels', () => {
|
|
31
|
-
const stats = {
|
|
32
|
-
controllers: {
|
|
33
|
-
processed: 1000,
|
|
34
|
-
failed: 10,
|
|
35
|
-
queued: 5,
|
|
36
|
-
job_duration: 10,
|
|
37
|
-
workers_joined: 20,
|
|
38
|
-
workers_disconnected: 30,
|
|
39
|
-
workers_reconnected: 40
|
|
40
|
-
}
|
|
41
|
-
};
|
|
42
|
-
const r = `# TYPE teraslice_slices_processed counter
|
|
43
|
-
teraslice_slices_processed{foo="bar"} ${stats.controllers.processed}
|
|
44
|
-
# TYPE teraslice_slices_failed counter
|
|
45
|
-
teraslice_slices_failed{foo="bar"} ${stats.controllers.failed}
|
|
46
|
-
# TYPE teraslice_slices_queued counter
|
|
47
|
-
teraslice_slices_queued{foo="bar"} ${stats.controllers.queued}
|
|
48
|
-
# TYPE teraslice_workers_joined counter
|
|
49
|
-
teraslice_workers_joined{foo="bar"} ${stats.controllers.workers_joined}
|
|
50
|
-
# TYPE teraslice_workers_disconnected counter
|
|
51
|
-
teraslice_workers_disconnected{foo="bar"} ${stats.controllers.workers_disconnected}
|
|
52
|
-
# TYPE teraslice_workers_reconnected counter
|
|
53
|
-
teraslice_workers_reconnected{foo="bar"} ${stats.controllers.workers_reconnected}
|
|
54
|
-
`;
|
|
55
|
-
expect(makePrometheus(stats, { foo: 'bar' })).toEqual(r);
|
|
56
|
-
});
|
|
57
|
-
it('should be able to detect if a request is prometheus', () => {
|
|
58
|
-
expect(isPrometheusTerasliceRequest({
|
|
59
|
-
headers: {
|
|
60
|
-
accept: 'blah application/openmetrics-text; blah blah'
|
|
61
|
-
}
|
|
62
|
-
})).toBeTruthy();
|
|
63
|
-
});
|
|
64
3
|
it('should be able to create the proper job queries', () => {
|
|
65
4
|
let query;
|
|
66
5
|
query = createJobActiveQuery('true');
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "teraslice",
|
|
3
3
|
"displayName": "Teraslice",
|
|
4
|
-
"version": "
|
|
4
|
+
"version": "3.0.0-dev.0",
|
|
5
5
|
"description": "Distributed computing platform for processing JSON data",
|
|
6
6
|
"homepage": "https://github.com/terascope/teraslice#readme",
|
|
7
7
|
"bugs": {
|
|
@@ -35,16 +35,16 @@
|
|
|
35
35
|
"test:watch": "TEST_ELASTICSEARCH='true' node ../scripts/bin/ts-scripts test --watch ../teraslice --"
|
|
36
36
|
},
|
|
37
37
|
"resolutions": {
|
|
38
|
-
"debug": "~4.4.
|
|
38
|
+
"debug": "~4.4.3",
|
|
39
39
|
"ms": "~2.1.3"
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
42
|
"@kubernetes/client-node": "~1.3.0",
|
|
43
|
-
"@terascope/elasticsearch-api": "~
|
|
44
|
-
"@terascope/job-components": "~
|
|
45
|
-
"@terascope/teraslice-messaging": "~
|
|
46
|
-
"@terascope/types": "~
|
|
47
|
-
"@terascope/utils": "~
|
|
43
|
+
"@terascope/elasticsearch-api": "~5.0.0-dev.0",
|
|
44
|
+
"@terascope/job-components": "~2.0.0-dev.0",
|
|
45
|
+
"@terascope/teraslice-messaging": "~2.0.0-dev.0",
|
|
46
|
+
"@terascope/types": "~2.0.0-dev.0",
|
|
47
|
+
"@terascope/utils": "~2.0.0-dev.0",
|
|
48
48
|
"async-mutex": "~0.5.0",
|
|
49
49
|
"barbe": "~3.0.17",
|
|
50
50
|
"body-parser": "~2.2.0",
|
|
@@ -55,19 +55,18 @@
|
|
|
55
55
|
"fs-extra": "~11.3.1",
|
|
56
56
|
"gc-stats": "1.4.1",
|
|
57
57
|
"get-port": "~7.1.0",
|
|
58
|
-
"got": "~14.4.
|
|
58
|
+
"got": "~14.4.8",
|
|
59
59
|
"ip": "~2.0.1",
|
|
60
|
-
"kubernetes-client": "~9.0.0",
|
|
61
60
|
"ms": "~2.1.3",
|
|
62
61
|
"nanoid": "~5.1.5",
|
|
63
62
|
"semver": "~7.7.2",
|
|
64
63
|
"socket.io": "~1.7.4",
|
|
65
64
|
"socket.io-client": "~1.7.4",
|
|
66
|
-
"terafoundation": "~
|
|
67
|
-
"uuid": "~
|
|
65
|
+
"terafoundation": "~2.0.0-dev.0",
|
|
66
|
+
"uuid": "~13.0.0"
|
|
68
67
|
},
|
|
69
68
|
"devDependencies": {
|
|
70
|
-
"@terascope/opensearch-client": "~
|
|
69
|
+
"@terascope/opensearch-client": "~2.0.0-dev.0",
|
|
71
70
|
"@types/archiver": "~6.0.3",
|
|
72
71
|
"@types/express": "~5.0.3",
|
|
73
72
|
"@types/gc-stats": "~1.4.3",
|
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
import { TSError, logError, get, cloneDeep, pRetry } from '@terascope/utils';
|
|
2
|
-
import { makeLogger } from '../../../../../workers/helpers/terafoundation.js';
|
|
3
|
-
import { K8sResource } from './k8sResource.js';
|
|
4
|
-
import { gen } from './k8sState.js';
|
|
5
|
-
import { K8s } from './k8s.js';
|
|
6
|
-
import { getRetryConfig } from './utils.js';
|
|
7
|
-
/*
|
|
8
|
-
Execution Life Cycle for _status
|
|
9
|
-
pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
|
|
10
|
-
Exceptions
|
|
11
|
-
rejected - when a job is rejected prior to scheduling
|
|
12
|
-
failed - when there is an error while the job is running
|
|
13
|
-
aborted - when a job was running at the point when the cluster shutsdown
|
|
14
|
-
*/
|
|
15
|
-
export class KubernetesClusterBackend {
|
|
16
|
-
context;
|
|
17
|
-
k8s;
|
|
18
|
-
logger;
|
|
19
|
-
clusterStateInterval;
|
|
20
|
-
clusterState = {};
|
|
21
|
-
clusterNameLabel;
|
|
22
|
-
constructor(context, clusterMasterServer) {
|
|
23
|
-
const kubernetesNamespace = get(context, 'sysconfig.teraslice.kubernetes_namespace', 'default');
|
|
24
|
-
const clusterName = get(context, 'sysconfig.teraslice.name');
|
|
25
|
-
this.context = context;
|
|
26
|
-
this.logger = makeLogger(context, 'kubernetes_cluster_service');
|
|
27
|
-
this.clusterNameLabel = clusterName.replace(/[^a-zA-Z0-9_\-.]/g, '_').substring(0, 63);
|
|
28
|
-
this.clusterState = {};
|
|
29
|
-
this.clusterStateInterval = undefined;
|
|
30
|
-
this.k8s = new K8s(this.logger, null, kubernetesNamespace, context.sysconfig.teraslice.kubernetes_api_poll_delay, context.sysconfig.teraslice.shutdown_timeout);
|
|
31
|
-
clusterMasterServer.onClientOnline((exId) => {
|
|
32
|
-
this.logger.info(`execution ${exId} is connected`);
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* getClusterState returns a copy of the clusterState object
|
|
37
|
-
* @return {Object} a copy of the clusterState object
|
|
38
|
-
*/
|
|
39
|
-
getClusterState() {
|
|
40
|
-
return cloneDeep(this.clusterState);
|
|
41
|
-
}
|
|
42
|
-
/**
|
|
43
|
-
* Creates clusterState by iterating over all k8s pods matching both labels
|
|
44
|
-
* app.kubernetes.io/name=teraslice
|
|
45
|
-
* app.kubernetes.io/instance=${clusterNameLabel}
|
|
46
|
-
* @constructor
|
|
47
|
-
* @return {Promise} [description]
|
|
48
|
-
*/
|
|
49
|
-
async _getClusterState() {
|
|
50
|
-
return this.k8s.list(`app.kubernetes.io/name=teraslice,app.kubernetes.io/instance=${this.clusterNameLabel}`, 'pods')
|
|
51
|
-
.then((k8sPods) => gen(k8sPods, this.clusterState))
|
|
52
|
-
.catch((err) => {
|
|
53
|
-
// TODO: We might need to do more here. I think it's OK to just
|
|
54
|
-
// log though. This only gets used to show slicer info through
|
|
55
|
-
// the API. We wouldn't want to disrupt the cluster master
|
|
56
|
-
// for rare failures to reach the k8s API.
|
|
57
|
-
logError(this.logger, err, 'Error listing teraslice pods in k8s');
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
/**
|
|
61
|
-
* Return value indicates whether the cluster has enough workers to start
|
|
62
|
-
* an execution. It must be able to allocate a slicer and at least one
|
|
63
|
-
* worker.
|
|
64
|
-
* @return {boolean} Ok to create job?
|
|
65
|
-
*/
|
|
66
|
-
readyForAllocation() {
|
|
67
|
-
// return _availableWorkers() >= 2;
|
|
68
|
-
// TODO: This will be addressed in the future, see:
|
|
69
|
-
// https://github.com/terascope/teraslice/issues/744
|
|
70
|
-
return true;
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Creates k8s Service and Job for the Teraslice Execution Controller
|
|
74
|
-
* (formerly slicer). This currently works by creating a service with a
|
|
75
|
-
* hostname that contains the exId in it listening on a well known port.
|
|
76
|
-
* The hostname and port are used later by the workers to contact this
|
|
77
|
-
* Execution Controller.
|
|
78
|
-
* @param {Object} execution Object containing execution details
|
|
79
|
-
* @return {Promise} [description]
|
|
80
|
-
*/
|
|
81
|
-
async allocateSlicer(ex) {
|
|
82
|
-
const execution = cloneDeep(ex);
|
|
83
|
-
execution.slicer_port = 45680;
|
|
84
|
-
const exJobResource = new K8sResource('jobs', 'execution_controller', this.context.sysconfig.teraslice, execution, this.logger);
|
|
85
|
-
const exJob = exJobResource.resource;
|
|
86
|
-
this.logger.debug(exJob, 'execution allocating slicer');
|
|
87
|
-
const jobResult = await this.k8s.post(exJob, 'job');
|
|
88
|
-
this.logger.debug(jobResult, 'k8s slicer job submitted');
|
|
89
|
-
let controllerLabel;
|
|
90
|
-
if (jobResult.spec.selector.matchLabels['controller-uid']) {
|
|
91
|
-
/// If running on kubernetes < v1.27.0
|
|
92
|
-
controllerLabel = 'controller-uid';
|
|
93
|
-
}
|
|
94
|
-
else {
|
|
95
|
-
/// If running on kubernetes v1.27.0 or later
|
|
96
|
-
controllerLabel = 'batch.kubernetes.io/controller-uid';
|
|
97
|
-
}
|
|
98
|
-
const controllerUid = jobResult.spec.selector.matchLabels[controllerLabel];
|
|
99
|
-
const pod = await this.k8s.waitForSelectedPod(`${controllerLabel}=${controllerUid}`, undefined, this.context.sysconfig.teraslice.slicer_timeout);
|
|
100
|
-
this.logger.debug(`Slicer is using IP: ${pod.status.podIP}`);
|
|
101
|
-
execution.slicer_hostname = `${pod.status.podIP}`;
|
|
102
|
-
return execution;
|
|
103
|
-
}
|
|
104
|
-
/**
|
|
105
|
-
* Creates k8s deployment that executes Teraslice workers for specified
|
|
106
|
-
* Execution.
|
|
107
|
-
* @param {Object} execution Object that contains information of Execution
|
|
108
|
-
* @return {Promise} [description]
|
|
109
|
-
*/
|
|
110
|
-
async allocateWorkers(execution) {
|
|
111
|
-
// NOTE: I tried to set these on the execution inside allocateSlicer
|
|
112
|
-
// but these properties were gone by the time this was called, perhaps
|
|
113
|
-
// because they are not on the schema. So I do this k8s API call
|
|
114
|
-
// instead.
|
|
115
|
-
const selector = `app.kubernetes.io/component=execution_controller,teraslice.terascope.io/jobId=${execution.job_id}`;
|
|
116
|
-
const jobs = await pRetry(() => this.k8s.nonEmptyList(selector, 'jobs'), getRetryConfig());
|
|
117
|
-
// @ts-expect-error
|
|
118
|
-
execution.k8sName = jobs.items[0].metadata.name;
|
|
119
|
-
// @ts-expect-error
|
|
120
|
-
execution.k8sUid = jobs.items[0].metadata.uid;
|
|
121
|
-
const kr = new K8sResource('deployments', 'worker', this.context.sysconfig.teraslice, execution, this.logger);
|
|
122
|
-
const workerDeployment = kr.resource;
|
|
123
|
-
this.logger.debug(`workerDeployment:\n\n${JSON.stringify(workerDeployment, null, 2)}`);
|
|
124
|
-
return this.k8s.post(workerDeployment, 'deployment')
|
|
125
|
-
.then((result) => this.logger.debug(`k8s worker deployment submitted: ${JSON.stringify(result)}`))
|
|
126
|
-
.catch((err) => {
|
|
127
|
-
const error = new TSError(err, {
|
|
128
|
-
reason: 'Error submitting k8s worker deployment'
|
|
129
|
-
});
|
|
130
|
-
return Promise.reject(error);
|
|
131
|
-
});
|
|
132
|
-
}
|
|
133
|
-
// FIXME: These functions should probably do something with the response
|
|
134
|
-
// NOTE: I find is strange that the expected return value here is
|
|
135
|
-
// effectively the same as the function inputs
|
|
136
|
-
async addWorkers(executionContext, numWorkers) {
|
|
137
|
-
await this.k8s.scaleExecution(executionContext.ex_id, numWorkers, 'add');
|
|
138
|
-
return { action: 'add', ex_id: executionContext.ex_id, workerNum: numWorkers };
|
|
139
|
-
}
|
|
140
|
-
// NOTE: This is passed exId instead of executionContext like addWorkers and
|
|
141
|
-
// removeWorkers. I don't know why, just dealing with it.
|
|
142
|
-
async removeWorkers(exId, numWorkers) {
|
|
143
|
-
await this.k8s.scaleExecution(exId, numWorkers, 'remove');
|
|
144
|
-
return { action: 'remove', ex_id: exId, workerNum: numWorkers };
|
|
145
|
-
}
|
|
146
|
-
// TODO: fix types here
|
|
147
|
-
async setWorkers(executionContext, numWorkers) {
|
|
148
|
-
await this.k8s.scaleExecution(executionContext.ex_id, numWorkers, 'set');
|
|
149
|
-
return { action: 'set', ex_id: executionContext.ex_id, workerNum: numWorkers };
|
|
150
|
-
}
|
|
151
|
-
/**
|
|
152
|
-
* Stops all workers for exId
|
|
153
|
-
* @param {String} exId The execution ID of the Execution to stop
|
|
154
|
-
* @param {StopExecutionOptions} options force, timeout, and excludeNode
|
|
155
|
-
* force: stop all related pod, deployment, and job resources
|
|
156
|
-
* timeout and excludeNode are not used in k8s clustering.
|
|
157
|
-
* @return {Promise}
|
|
158
|
-
*/
|
|
159
|
-
async stopExecution(exId, options) {
|
|
160
|
-
return this.k8s.deleteExecution(exId, options?.force);
|
|
161
|
-
}
|
|
162
|
-
async shutdown() {
|
|
163
|
-
clearInterval(this.clusterStateInterval);
|
|
164
|
-
}
|
|
165
|
-
/**
|
|
166
|
-
* Returns a list of all k8s resources associated with a job ID
|
|
167
|
-
* @param {string} jobId The job ID of the job to list associated resources
|
|
168
|
-
* @returns {Array<any>}
|
|
169
|
-
*/
|
|
170
|
-
async listResourcesForJobId(jobId) {
|
|
171
|
-
const resources = [];
|
|
172
|
-
const resourceTypes = ['pods', 'deployments', 'services', 'jobs', 'replicasets'];
|
|
173
|
-
for (const type of resourceTypes) {
|
|
174
|
-
const list = await this.k8s.list(`teraslice.terascope.io/jobId=${jobId}`, type);
|
|
175
|
-
if (list.items.length > 0) {
|
|
176
|
-
resources.push(list.items);
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
return resources;
|
|
180
|
-
}
|
|
181
|
-
async initialize() {
|
|
182
|
-
this.logger.info('kubernetes clustering initializing');
|
|
183
|
-
// Periodically update cluster state, update period controlled by:
|
|
184
|
-
// context.sysconfig.teraslice.node_state_interval
|
|
185
|
-
this.clusterStateInterval = setInterval(() => {
|
|
186
|
-
this.logger.trace('cluster_master requesting cluster state update.');
|
|
187
|
-
this._getClusterState();
|
|
188
|
-
}, this.context.sysconfig.teraslice.node_state_interval);
|
|
189
|
-
await this.k8s.init();
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
//# sourceMappingURL=index.js.map
|