teraslice 0.87.1 → 0.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cluster-service.js +24 -18
- package/dist/src/index.js +42 -0
- package/package.json +11 -15
- package/service.js +4 -6
- package/worker-service.js +6 -6
- package/index.js +0 -21
- package/lib/cluster/cluster_master.js +0 -164
- package/lib/cluster/node_master.js +0 -393
- package/lib/cluster/services/api.js +0 -581
- package/lib/cluster/services/assets.js +0 -211
- package/lib/cluster/services/cluster/backends/kubernetes/deployments/worker.hbs +0 -86
- package/lib/cluster/services/cluster/backends/kubernetes/index.js +0 -225
- package/lib/cluster/services/cluster/backends/kubernetes/jobs/execution_controller.hbs +0 -69
- package/lib/cluster/services/cluster/backends/kubernetes/k8s.js +0 -450
- package/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +0 -443
- package/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +0 -67
- package/lib/cluster/services/cluster/backends/kubernetes/utils.js +0 -58
- package/lib/cluster/services/cluster/backends/native/index.js +0 -611
- package/lib/cluster/services/cluster/backends/native/messaging.js +0 -563
- package/lib/cluster/services/cluster/backends/state-utils.js +0 -49
- package/lib/cluster/services/cluster/index.js +0 -15
- package/lib/cluster/services/execution.js +0 -459
- package/lib/cluster/services/jobs.js +0 -303
- package/lib/config/default-sysconfig.js +0 -47
- package/lib/config/index.js +0 -32
- package/lib/config/schemas/system.js +0 -333
- package/lib/processors/save_file/index.js +0 -9
- package/lib/processors/save_file/processor.js +0 -17
- package/lib/processors/save_file/schema.js +0 -17
- package/lib/processors/script.js +0 -130
- package/lib/processors/stdout/index.js +0 -9
- package/lib/processors/stdout/processor.js +0 -19
- package/lib/processors/stdout/schema.js +0 -18
- package/lib/storage/analytics.js +0 -106
- package/lib/storage/assets.js +0 -275
- package/lib/storage/backends/elasticsearch_store.js +0 -567
- package/lib/storage/backends/mappings/analytics.json +0 -49
- package/lib/storage/backends/mappings/asset.json +0 -40
- package/lib/storage/backends/mappings/ex.json +0 -55
- package/lib/storage/backends/mappings/job.json +0 -31
- package/lib/storage/backends/mappings/state.json +0 -37
- package/lib/storage/execution.js +0 -331
- package/lib/storage/index.js +0 -16
- package/lib/storage/jobs.js +0 -97
- package/lib/storage/state.js +0 -302
- package/lib/utils/api_utils.js +0 -173
- package/lib/utils/asset_utils.js +0 -117
- package/lib/utils/date_utils.js +0 -58
- package/lib/utils/encoding_utils.js +0 -29
- package/lib/utils/events.js +0 -7
- package/lib/utils/file_utils.js +0 -118
- package/lib/utils/id_utils.js +0 -19
- package/lib/utils/port_utils.js +0 -83
- package/lib/workers/assets/loader.js +0 -109
- package/lib/workers/assets/spawn.js +0 -78
- package/lib/workers/context/execution-context.js +0 -16
- package/lib/workers/context/terafoundation-context.js +0 -10
- package/lib/workers/execution-controller/execution-analytics.js +0 -211
- package/lib/workers/execution-controller/index.js +0 -1033
- package/lib/workers/execution-controller/recovery.js +0 -188
- package/lib/workers/execution-controller/scheduler.js +0 -461
- package/lib/workers/execution-controller/slice-analytics.js +0 -115
- package/lib/workers/helpers/job.js +0 -93
- package/lib/workers/helpers/op-analytics.js +0 -22
- package/lib/workers/helpers/terafoundation.js +0 -43
- package/lib/workers/helpers/worker-shutdown.js +0 -187
- package/lib/workers/metrics/index.js +0 -139
- package/lib/workers/worker/index.js +0 -344
- package/lib/workers/worker/slice.js +0 -143
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const express = require('express');
|
|
4
|
-
const {
|
|
5
|
-
TSError, parseErrorInfo, logError, toBoolean
|
|
6
|
-
} = require('@terascope/utils');
|
|
7
|
-
const { makeLogger } = require('../../workers/helpers/terafoundation');
|
|
8
|
-
const makeAssetsStore = require('../../storage/assets');
|
|
9
|
-
const {
|
|
10
|
-
makeTable,
|
|
11
|
-
handleRequest,
|
|
12
|
-
getSearchOptions,
|
|
13
|
-
sendError,
|
|
14
|
-
} = require('../../utils/api_utils');
|
|
15
|
-
|
|
16
|
-
module.exports = function assetsService(context) {
|
|
17
|
-
const logger = makeLogger(context, 'assets_service');
|
|
18
|
-
const app = express();
|
|
19
|
-
|
|
20
|
-
let assetsStore;
|
|
21
|
-
let running = false;
|
|
22
|
-
|
|
23
|
-
app.set('json spaces', 4);
|
|
24
|
-
|
|
25
|
-
app.use((req, res, next) => {
|
|
26
|
-
req.logger = logger;
|
|
27
|
-
next();
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
app.get('/status', (req, res) => {
|
|
31
|
-
const requestHandler = handleRequest(req, res);
|
|
32
|
-
requestHandler(() => ({ available: running }));
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
app.post('/assets', (req, res) => {
|
|
36
|
-
const blocking = toBoolean(req.query.blocking);
|
|
37
|
-
logger.debug('loading an asset', { blocking });
|
|
38
|
-
|
|
39
|
-
const results = [];
|
|
40
|
-
|
|
41
|
-
req.on('data', (buff) => {
|
|
42
|
-
results.push(buff);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
req.on('end', () => {
|
|
46
|
-
const data = Buffer.concat(results);
|
|
47
|
-
assetsStore.save(data, blocking)
|
|
48
|
-
.then(({ assetId, created }) => {
|
|
49
|
-
const code = created ? 201 : 200;
|
|
50
|
-
res.status(code).json({
|
|
51
|
-
_id: assetId
|
|
52
|
-
});
|
|
53
|
-
})
|
|
54
|
-
.catch((err) => {
|
|
55
|
-
const { statusCode, message } = parseErrorInfo(err);
|
|
56
|
-
logError(logger, err, 'failure saving assets via proxy request');
|
|
57
|
-
sendError(res, statusCode, message);
|
|
58
|
-
});
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
req.on('error', (err) => {
|
|
62
|
-
const { statusCode, message } = parseErrorInfo(err);
|
|
63
|
-
logError(logger, err, 'failure writing asset');
|
|
64
|
-
res.status(statusCode).send(message);
|
|
65
|
-
});
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
app.delete('/assets/:assetId', (req, res) => {
|
|
69
|
-
const { assetId } = req.params;
|
|
70
|
-
const requestHandler = handleRequest(req, res, `Could not delete asset ${assetId}`);
|
|
71
|
-
|
|
72
|
-
if (assetId.length !== 40) {
|
|
73
|
-
res.status(400).json({
|
|
74
|
-
error: `asset ${assetId} is not formatted correctly, please provide the full asset_id`
|
|
75
|
-
});
|
|
76
|
-
} else {
|
|
77
|
-
requestHandler(async () => {
|
|
78
|
-
await assetsStore.remove(assetId);
|
|
79
|
-
return { _id: assetId };
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
app.get('/txt/assets', (req, res) => {
|
|
85
|
-
const query = 'id:*';
|
|
86
|
-
createAssetTable(query, req, res);
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
app.get('/txt/assets/:name', (req, res) => {
|
|
90
|
-
const query = `id:* AND name:"${req.params.name}"`;
|
|
91
|
-
createAssetTable(query, req, res);
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
app.get('/txt/assets/:name/:version', (req, res) => {
|
|
95
|
-
const query = `id:* AND name:"${req.params.name}" AND version:"${req.params.version}"`;
|
|
96
|
-
createAssetTable(query, req, res);
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
app.get('/assets', (req, res) => {
|
|
100
|
-
const query = 'id:*';
|
|
101
|
-
assetsSearch(query, req, res);
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
app.get('/assets/:name', (req, res) => {
|
|
105
|
-
const query = `id:* AND name:"${req.params.name}"`;
|
|
106
|
-
assetsSearch(query, req, res);
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
app.get('/assets/:name/:version', (req, res) => {
|
|
110
|
-
const query = `id:* AND name:"${req.params.name}" AND version:"${req.params.version}"`;
|
|
111
|
-
assetsSearch(query, req, res);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
function createAssetTable(query, req, res) {
|
|
115
|
-
const { size, from, sort } = getSearchOptions(req, '_created:desc');
|
|
116
|
-
|
|
117
|
-
const defaults = [
|
|
118
|
-
'name',
|
|
119
|
-
'version',
|
|
120
|
-
'id',
|
|
121
|
-
'_created',
|
|
122
|
-
'description',
|
|
123
|
-
'node_version',
|
|
124
|
-
'platform',
|
|
125
|
-
'arch'
|
|
126
|
-
];
|
|
127
|
-
|
|
128
|
-
function mapping(item) {
|
|
129
|
-
return (field) => {
|
|
130
|
-
if (field === 'description') {
|
|
131
|
-
return item[field] ? item[field].slice(0, 30) : item[field];
|
|
132
|
-
}
|
|
133
|
-
return item[field];
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
const requestHandler = handleRequest(req, res, 'Could not get assets');
|
|
138
|
-
requestHandler(async () => {
|
|
139
|
-
const results = await assetsStore.search(query, from, size, sort, defaults);
|
|
140
|
-
const assets = results.hits.hits.map((asset) => {
|
|
141
|
-
const record = asset._source;
|
|
142
|
-
record.id = asset._id;
|
|
143
|
-
return record;
|
|
144
|
-
});
|
|
145
|
-
return makeTable(req, defaults, assets, mapping);
|
|
146
|
-
});
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function assetsSearch(query, req, res) {
|
|
150
|
-
const { size, from, sort } = getSearchOptions(req, '_created:desc');
|
|
151
|
-
|
|
152
|
-
const requestHandler = handleRequest(req, res, 'Could not get assets');
|
|
153
|
-
requestHandler(async () => {
|
|
154
|
-
const fields = ['_created', 'name', 'version', 'description', 'node_version', 'platform', 'arch'];
|
|
155
|
-
const results = await assetsStore.search(query, from, size, sort, fields);
|
|
156
|
-
return results.hits.hits.map((asset) => {
|
|
157
|
-
const record = asset._source;
|
|
158
|
-
record.id = asset._id;
|
|
159
|
-
return record;
|
|
160
|
-
});
|
|
161
|
-
});
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
const { port } = process.env;
|
|
165
|
-
return {
|
|
166
|
-
async initialize() {
|
|
167
|
-
try {
|
|
168
|
-
assetsStore = await makeAssetsStore(context);
|
|
169
|
-
|
|
170
|
-
await new Promise((resolve, reject) => {
|
|
171
|
-
app.listen(port, (err) => {
|
|
172
|
-
if (err) {
|
|
173
|
-
reject(err);
|
|
174
|
-
return;
|
|
175
|
-
}
|
|
176
|
-
logger.info(`assets_service is listening on port ${port}`);
|
|
177
|
-
resolve();
|
|
178
|
-
});
|
|
179
|
-
app.timeout = context.sysconfig.teraslice.api_response_timeout;
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
await assetsStore.autoload();
|
|
183
|
-
running = true;
|
|
184
|
-
} catch (err) {
|
|
185
|
-
running = false;
|
|
186
|
-
throw new TSError(err, {
|
|
187
|
-
reason: 'Failure while creating assets_service'
|
|
188
|
-
});
|
|
189
|
-
}
|
|
190
|
-
},
|
|
191
|
-
run() {
|
|
192
|
-
return new Promise((resolve) => {
|
|
193
|
-
if (!running) {
|
|
194
|
-
resolve();
|
|
195
|
-
return;
|
|
196
|
-
}
|
|
197
|
-
const runningInterval = setInterval(() => {
|
|
198
|
-
if (!running) {
|
|
199
|
-
clearInterval(runningInterval);
|
|
200
|
-
resolve();
|
|
201
|
-
}
|
|
202
|
-
}, 1000);
|
|
203
|
-
});
|
|
204
|
-
},
|
|
205
|
-
async shutdown() {
|
|
206
|
-
running = false;
|
|
207
|
-
if (!assetsStore) return;
|
|
208
|
-
await assetsStore.shutdown(true);
|
|
209
|
-
}
|
|
210
|
-
};
|
|
211
|
-
};
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"apiVersion": "apps/v1",
|
|
3
|
-
"kind": "Deployment",
|
|
4
|
-
"metadata": {
|
|
5
|
-
"labels": {
|
|
6
|
-
"app.kubernetes.io/name": "teraslice",
|
|
7
|
-
"app.kubernetes.io/component": "{{nodeType}}",
|
|
8
|
-
"teraslice.terascope.io/exId": "{{exId}}",
|
|
9
|
-
"teraslice.terascope.io/jobId": "{{jobId}}",
|
|
10
|
-
"teraslice.terascope.io/jobName": "{{jobNameLabel}}",
|
|
11
|
-
"app.kubernetes.io/instance": "{{clusterNameLabel}}"
|
|
12
|
-
},
|
|
13
|
-
"name": "{{name}}",
|
|
14
|
-
"namespace": "{{namespace}}",
|
|
15
|
-
"ownerReferences": [
|
|
16
|
-
{
|
|
17
|
-
"apiVersion": "batch/v1",
|
|
18
|
-
"controller": false,
|
|
19
|
-
"blockOwnerDeletion": false,
|
|
20
|
-
"kind": "Job",
|
|
21
|
-
"name": "{{exName}}",
|
|
22
|
-
"uid": "{{exUid}}"
|
|
23
|
-
}
|
|
24
|
-
]
|
|
25
|
-
},
|
|
26
|
-
"spec": {
|
|
27
|
-
"replicas": {{replicas}},
|
|
28
|
-
"template": {
|
|
29
|
-
"metadata": {
|
|
30
|
-
"labels": {
|
|
31
|
-
"app.kubernetes.io/name": "teraslice",
|
|
32
|
-
"app.kubernetes.io/component": "{{nodeType}}",
|
|
33
|
-
"teraslice.terascope.io/exId": "{{exId}}",
|
|
34
|
-
"teraslice.terascope.io/jobId": "{{jobId}}",
|
|
35
|
-
"teraslice.terascope.io/jobName": "{{jobNameLabel}}",
|
|
36
|
-
"app.kubernetes.io/instance": "{{clusterNameLabel}}"
|
|
37
|
-
}
|
|
38
|
-
},
|
|
39
|
-
"spec": {
|
|
40
|
-
"containers": [{
|
|
41
|
-
"image": "{{dockerImage}}",
|
|
42
|
-
"name": "{{name}}",
|
|
43
|
-
"ports": [{ "containerPort": 45680 }],
|
|
44
|
-
"volumeMounts": [{
|
|
45
|
-
"mountPath": "/app/config",
|
|
46
|
-
"name": "config"
|
|
47
|
-
}],
|
|
48
|
-
"env": [{
|
|
49
|
-
"name": "NODE_TYPE",
|
|
50
|
-
"value": "{{nodeType}}"
|
|
51
|
-
},
|
|
52
|
-
{
|
|
53
|
-
"name": "EX",
|
|
54
|
-
"value": "{{execution}}"
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"name": "POD_IP",
|
|
58
|
-
"valueFrom": {
|
|
59
|
-
"fieldRef": {
|
|
60
|
-
"fieldPath": "status.podIP"
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}]
|
|
64
|
-
}],
|
|
65
|
-
"volumes": [{
|
|
66
|
-
"name": "config",
|
|
67
|
-
"configMap": {
|
|
68
|
-
"name": "{{configMapName}}",
|
|
69
|
-
"items": [{
|
|
70
|
-
"key": "teraslice.yaml",
|
|
71
|
-
"path": "teraslice.yaml"
|
|
72
|
-
}]
|
|
73
|
-
}
|
|
74
|
-
}],
|
|
75
|
-
"terminationGracePeriodSeconds": {{shutdownTimeout}}
|
|
76
|
-
}
|
|
77
|
-
},
|
|
78
|
-
"selector": {
|
|
79
|
-
"matchLabels": {
|
|
80
|
-
"app.kubernetes.io/name": "teraslice",
|
|
81
|
-
"app.kubernetes.io/component": "{{nodeType}}",
|
|
82
|
-
"teraslice.terascope.io/exId": "{{exId}}"
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const {
|
|
4
|
-
TSError, logError, get, cloneDeep, pRetry
|
|
5
|
-
} = require('@terascope/utils');
|
|
6
|
-
const { makeLogger } = require('../../../../../workers/helpers/terafoundation');
|
|
7
|
-
const K8sResource = require('./k8sResource');
|
|
8
|
-
const k8sState = require('./k8sState');
|
|
9
|
-
const K8s = require('./k8s');
|
|
10
|
-
const { getRetryConfig } = require('./utils');
|
|
11
|
-
|
|
12
|
-
/*
|
|
13
|
-
Execution Life Cycle for _status
|
|
14
|
-
pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
|
|
15
|
-
Exceptions
|
|
16
|
-
rejected - when a job is rejected prior to scheduling
|
|
17
|
-
failed - when there is an error while the job is running
|
|
18
|
-
aborted - when a job was running at the point when the cluster shutsdown
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
module.exports = function kubernetesClusterBackend(context, clusterMasterServer) {
|
|
22
|
-
const logger = makeLogger(context, 'kubernetes_cluster_service');
|
|
23
|
-
|
|
24
|
-
const clusterName = get(context, 'sysconfig.teraslice.name');
|
|
25
|
-
const clusterNameLabel = clusterName.replace(/[^a-zA-Z0-9_\-.]/g, '_').substring(0, 63);
|
|
26
|
-
const kubernetesNamespace = get(context, 'sysconfig.teraslice.kubernetes_namespace', 'default');
|
|
27
|
-
|
|
28
|
-
const clusterState = {};
|
|
29
|
-
let clusterStateInterval = null;
|
|
30
|
-
|
|
31
|
-
const k8s = new K8s(
|
|
32
|
-
logger,
|
|
33
|
-
null,
|
|
34
|
-
kubernetesNamespace,
|
|
35
|
-
context.sysconfig.teraslice.kubernetes_api_poll_delay,
|
|
36
|
-
context.sysconfig.teraslice.shutdown_timeout
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
clusterMasterServer.onClientOnline((exId) => {
|
|
40
|
-
logger.info(`execution ${exId} is connected`);
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* getClusterState returns a copy of the clusterState object
|
|
45
|
-
* @return {Object} a copy of the clusterState object
|
|
46
|
-
*/
|
|
47
|
-
function getClusterState() {
|
|
48
|
-
return cloneDeep(clusterState);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Creates clusterState by iterating over all k8s pods matching both labels
|
|
53
|
-
* app.kubernetes.io/name=teraslice
|
|
54
|
-
* app.kubernetes.io/instance=${clusterNameLabel}
|
|
55
|
-
* @constructor
|
|
56
|
-
* @return {Promise} [description]
|
|
57
|
-
*/
|
|
58
|
-
function _getClusterState() {
|
|
59
|
-
return k8s.list(`app.kubernetes.io/name=teraslice,app.kubernetes.io/instance=${clusterNameLabel}`, 'pods')
|
|
60
|
-
.then((k8sPods) => k8sState.gen(k8sPods, clusterState, clusterNameLabel))
|
|
61
|
-
.catch((err) => {
|
|
62
|
-
// TODO: We might need to do more here. I think it's OK to just
|
|
63
|
-
// log though. This only gets used to show slicer info through
|
|
64
|
-
// the API. We wouldn't want to disrupt the cluster master
|
|
65
|
-
// for rare failures to reach the k8s API.
|
|
66
|
-
logError(logger, err, 'Error listing teraslice pods in k8s');
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Return value indicates whether the cluster has enough workers to start
|
|
72
|
-
* an execution. It must be able to allocate a slicer and at least one
|
|
73
|
-
* worker.
|
|
74
|
-
* @return {boolean} Ok to create job?
|
|
75
|
-
*/
|
|
76
|
-
function readyForAllocation() {
|
|
77
|
-
// return _availableWorkers() >= 2;
|
|
78
|
-
// TODO: This will be addressed in the future, see:
|
|
79
|
-
// https://github.com/terascope/teraslice/issues/744
|
|
80
|
-
return true;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Creates k8s Service and Job for the Teraslice Execution Controller
|
|
85
|
-
* (formerly slicer). This currently works by creating a service with a
|
|
86
|
-
* hostname that contains the exId in it listening on a well known port.
|
|
87
|
-
* The hostname and port are used later by the workers to contact this
|
|
88
|
-
* Execution Controller.
|
|
89
|
-
* @param {Object} execution Object containing execution details
|
|
90
|
-
* @return {Promise} [description]
|
|
91
|
-
*/
|
|
92
|
-
async function allocateSlicer(ex) {
|
|
93
|
-
const execution = cloneDeep(ex);
|
|
94
|
-
|
|
95
|
-
execution.slicer_port = 45680;
|
|
96
|
-
const exJobResource = new K8sResource(
|
|
97
|
-
'jobs', 'execution_controller', context.sysconfig.teraslice, execution, logger
|
|
98
|
-
);
|
|
99
|
-
const exJob = exJobResource.resource;
|
|
100
|
-
|
|
101
|
-
logger.debug(exJob, 'execution allocating slicer');
|
|
102
|
-
|
|
103
|
-
const jobResult = await k8s.post(exJob, 'job');
|
|
104
|
-
logger.debug(jobResult, 'k8s slicer job submitted');
|
|
105
|
-
|
|
106
|
-
let controllerLabel;
|
|
107
|
-
if (jobResult.spec.selector.matchLabels['controller-uid']) {
|
|
108
|
-
/// If running on kubernetes < v1.27.0
|
|
109
|
-
controllerLabel = 'controller-uid';
|
|
110
|
-
} else {
|
|
111
|
-
/// If running on kubernetes v1.27.0 or later
|
|
112
|
-
controllerLabel = 'batch.kubernetes.io/controller-uid';
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
const controllerUid = jobResult.spec.selector.matchLabels[controllerLabel];
|
|
116
|
-
|
|
117
|
-
const pod = await k8s.waitForSelectedPod(
|
|
118
|
-
`${controllerLabel}=${controllerUid}`,
|
|
119
|
-
null,
|
|
120
|
-
context.sysconfig.teraslice.slicer_timeout
|
|
121
|
-
);
|
|
122
|
-
|
|
123
|
-
logger.debug(`Slicer is using IP: ${pod.status.podIP}`);
|
|
124
|
-
execution.slicer_hostname = `${pod.status.podIP}`;
|
|
125
|
-
|
|
126
|
-
return execution;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Creates k8s deployment that executes Teraslice workers for specified
|
|
131
|
-
* Execution.
|
|
132
|
-
* @param {Object} execution Object that contains information of Execution
|
|
133
|
-
* @return {Promise} [description]
|
|
134
|
-
*/
|
|
135
|
-
async function allocateWorkers(execution) {
|
|
136
|
-
// NOTE: I tried to set these on the execution inside allocateSlicer
|
|
137
|
-
// but these properties were gone by the time this was called, perhaps
|
|
138
|
-
// because they are not on the schema. So I do this k8s API call
|
|
139
|
-
// instead.
|
|
140
|
-
const selector = `app.kubernetes.io/component=execution_controller,teraslice.terascope.io/jobId=${execution.job_id}`;
|
|
141
|
-
const jobs = await pRetry(
|
|
142
|
-
() => k8s.nonEmptyList(selector, 'jobs'), getRetryConfig
|
|
143
|
-
);
|
|
144
|
-
execution.k8sName = jobs.items[0].metadata.name;
|
|
145
|
-
execution.k8sUid = jobs.items[0].metadata.uid;
|
|
146
|
-
|
|
147
|
-
const kr = new K8sResource(
|
|
148
|
-
'deployments', 'worker', context.sysconfig.teraslice, execution, logger
|
|
149
|
-
);
|
|
150
|
-
|
|
151
|
-
const workerDeployment = kr.resource;
|
|
152
|
-
|
|
153
|
-
logger.debug(`workerDeployment:\n\n${JSON.stringify(workerDeployment, null, 2)}`);
|
|
154
|
-
|
|
155
|
-
return k8s.post(workerDeployment, 'deployment')
|
|
156
|
-
.then((result) => logger.debug(`k8s worker deployment submitted: ${JSON.stringify(result)}`))
|
|
157
|
-
.catch((err) => {
|
|
158
|
-
const error = new TSError(err, {
|
|
159
|
-
reason: 'Error submitting k8s worker deployment'
|
|
160
|
-
});
|
|
161
|
-
return Promise.reject(error);
|
|
162
|
-
});
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// FIXME: These functions should probably do something with the response
|
|
166
|
-
// NOTE: I find is strange that the expected return value here is
|
|
167
|
-
// effectively the same as the function inputs
|
|
168
|
-
async function addWorkers(executionContext, numWorkers) {
|
|
169
|
-
await k8s.scaleExecution(executionContext.ex_id, numWorkers, 'add');
|
|
170
|
-
return { action: 'add', ex_id: executionContext.ex_id, workerNum: numWorkers };
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
// NOTE: This is passed exId instead of executionContext like addWorkers and
|
|
174
|
-
// removeWorkers. I don't know why, just dealing with it.
|
|
175
|
-
async function removeWorkers(exId, numWorkers) {
|
|
176
|
-
await k8s.scaleExecution(exId, numWorkers, 'remove');
|
|
177
|
-
return { action: 'remove', ex_id: exId, workerNum: numWorkers };
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
async function setWorkers(executionContext, numWorkers) {
|
|
181
|
-
await k8s.scaleExecution(executionContext.ex_id, numWorkers, 'set');
|
|
182
|
-
return { action: 'set', ex_id: executionContext.ex_id, workerNum: numWorkers };
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
/**
|
|
186
|
-
* Stops all workers for exId
|
|
187
|
-
* @param {string} exId The execution ID of the Execution to stop
|
|
188
|
-
* @return {Promise}
|
|
189
|
-
*/
|
|
190
|
-
|
|
191
|
-
async function stopExecution(exId) {
|
|
192
|
-
return k8s.deleteExecution(exId);
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
async function shutdown() {
|
|
196
|
-
clearInterval(clusterStateInterval);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
async function initialize() {
|
|
200
|
-
logger.info('kubernetes clustering initializing');
|
|
201
|
-
|
|
202
|
-
// Periodically update cluster state, update period controlled by:
|
|
203
|
-
// context.sysconfig.teraslice.node_state_interval
|
|
204
|
-
clusterStateInterval = setInterval(() => {
|
|
205
|
-
logger.trace('cluster_master requesting cluster state update.');
|
|
206
|
-
_getClusterState();
|
|
207
|
-
}, context.sysconfig.teraslice.node_state_interval);
|
|
208
|
-
|
|
209
|
-
await k8s.init();
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
return {
|
|
213
|
-
getClusterState,
|
|
214
|
-
allocateWorkers,
|
|
215
|
-
allocateSlicer,
|
|
216
|
-
initialize,
|
|
217
|
-
shutdown,
|
|
218
|
-
stopExecution,
|
|
219
|
-
removeWorkers,
|
|
220
|
-
addWorkers,
|
|
221
|
-
setWorkers,
|
|
222
|
-
readyForAllocation,
|
|
223
|
-
// clusterAvailable TODO: return false if k8s API unavailable, not in use
|
|
224
|
-
};
|
|
225
|
-
};
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"apiVersion": "batch/v1",
|
|
3
|
-
"kind": "Job",
|
|
4
|
-
"metadata": {
|
|
5
|
-
"labels": {
|
|
6
|
-
"app.kubernetes.io/name": "teraslice",
|
|
7
|
-
"app.kubernetes.io/component": "{{nodeType}}",
|
|
8
|
-
"teraslice.terascope.io/exId": "{{exId}}",
|
|
9
|
-
"teraslice.terascope.io/jobId": "{{jobId}}",
|
|
10
|
-
"teraslice.terascope.io/jobName": "{{jobNameLabel}}",
|
|
11
|
-
"app.kubernetes.io/instance": "{{clusterNameLabel}}"
|
|
12
|
-
},
|
|
13
|
-
"name": "{{name}}",
|
|
14
|
-
"namespace": "{{namespace}}"
|
|
15
|
-
},
|
|
16
|
-
"spec": {
|
|
17
|
-
"template": {
|
|
18
|
-
"metadata": {
|
|
19
|
-
"labels": {
|
|
20
|
-
"app.kubernetes.io/name": "teraslice",
|
|
21
|
-
"app.kubernetes.io/component": "{{nodeType}}",
|
|
22
|
-
"teraslice.terascope.io/exId": "{{exId}}",
|
|
23
|
-
"teraslice.terascope.io/jobId": "{{jobId}}",
|
|
24
|
-
"teraslice.terascope.io/jobName": "{{jobNameLabel}}",
|
|
25
|
-
"app.kubernetes.io/instance": "{{clusterNameLabel}}"
|
|
26
|
-
}
|
|
27
|
-
},
|
|
28
|
-
"spec": {
|
|
29
|
-
"containers": [{
|
|
30
|
-
"image": "{{dockerImage}}",
|
|
31
|
-
"name": "{{name}}",
|
|
32
|
-
"ports": [{ "containerPort": 45680 }],
|
|
33
|
-
"volumeMounts": [{
|
|
34
|
-
"mountPath": "/app/config",
|
|
35
|
-
"name": "config"
|
|
36
|
-
}],
|
|
37
|
-
"env": [{
|
|
38
|
-
"name": "NODE_TYPE",
|
|
39
|
-
"value": "{{nodeType}}"
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
"name": "EX",
|
|
43
|
-
"value": "{{execution}}"
|
|
44
|
-
},
|
|
45
|
-
{
|
|
46
|
-
"name": "POD_IP",
|
|
47
|
-
"valueFrom": {
|
|
48
|
-
"fieldRef": {
|
|
49
|
-
"fieldPath": "status.podIP"
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
}]
|
|
53
|
-
}],
|
|
54
|
-
"volumes": [{
|
|
55
|
-
"name": "config",
|
|
56
|
-
"configMap": {
|
|
57
|
-
"name": "{{configMapName}}",
|
|
58
|
-
"items": [{
|
|
59
|
-
"key": "teraslice.yaml",
|
|
60
|
-
"path": "teraslice.yaml"
|
|
61
|
-
}]
|
|
62
|
-
}
|
|
63
|
-
}],
|
|
64
|
-
"terminationGracePeriodSeconds": {{shutdownTimeout}},
|
|
65
|
-
"restartPolicy": "Never"
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|