teraslice 2.17.4 → 3.0.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -394,7 +394,7 @@ export class ApiService {
394
394
  const requestHandler = handleTerasliceRequest(req, res, 'Could not get cluster statistics');
395
395
  requestHandler(async () => {
396
396
  const stats = executionService.getClusterAnalytics();
397
- // for backwards compatability
397
+ // for backwards compatibility
398
398
  // @ts-expect-error
399
399
  stats.slicer = stats.controllers;
400
400
  return stats;
@@ -416,7 +416,7 @@ export class ApiService {
416
416
  if (this.clusterType === 'native') {
417
417
  defaults = ['assignment', 'job_id', 'ex_id', 'node_id', 'pid'];
418
418
  }
419
- if (this.clusterType === 'kubernetes' || this.clusterType === 'kubernetesV2') {
419
+ if (this.clusterType === 'kubernetesV2') {
420
420
  defaults = ['assignment', 'job_id', 'ex_id', 'node_id', 'pod_name', 'image'];
421
421
  }
422
422
  const requestHandler = handleTerasliceRequest(req, res, 'Could not get all workers');
@@ -624,6 +624,7 @@ export class ApiService {
624
624
  }
625
625
  }
626
626
  }
627
+ // TODO: removing native clustering will remove the need for any here
627
628
  const clusterState = this.clusterService.getClusterState();
628
629
  /// Filter out information about kubernetes ex pods
629
630
  const filteredExecutions = {};
@@ -12,7 +12,7 @@ import { K8sDeploymentResource } from './k8sDeploymentResource.js';
12
12
  Exceptions
13
13
  rejected - when a job is rejected prior to scheduling
14
14
  failed - when there is an error while the job is running
15
- aborted - when a job was running at the point when the cluster shutsdown
15
+ aborted - when a job was running at the point when the cluster shuts down
16
16
  */
17
17
  export class KubernetesClusterBackendV2 {
18
18
  context;
@@ -1,14 +1,10 @@
1
1
  import { NativeClustering } from './backends/native/index.js';
2
- import { KubernetesClusterBackend } from './backends/kubernetes/index.js';
3
2
  import { KubernetesClusterBackendV2 } from './backends/kubernetesV2/index.js';
4
3
  export function makeClustering(context, { clusterMasterServer }) {
5
4
  const clusterType = context.sysconfig.teraslice.cluster_manager_type;
6
5
  if (clusterType === 'native') {
7
6
  return new NativeClustering(context, clusterMasterServer);
8
7
  }
9
- if (clusterType === 'kubernetes') {
10
- return new KubernetesClusterBackend(context, clusterMasterServer);
11
- }
12
8
  if (clusterType === 'kubernetesV2') {
13
9
  return new KubernetesClusterBackendV2(context, clusterMasterServer);
14
10
  }
@@ -386,7 +386,7 @@ export class ExecutionService {
386
386
  }));
387
387
  }
388
388
  const clusteringType = this.context.sysconfig.teraslice.cluster_manager_type;
389
- if (clusteringType === 'kubernetes' || clusteringType === 'kubernetesV2') {
389
+ if (clusteringType === 'kubernetesV2') {
390
390
  // Since this condition is only hit in cases where the pods
391
391
  // are never scheduled, all this call to stopExecution
392
392
  // accomplishes is to delete the k8s resources, which is
@@ -114,12 +114,14 @@ export class JobsService {
114
114
  statusCode: 409
115
115
  });
116
116
  }
117
- let currentResources = await this.executionService.listResourcesForJobId(jobId);
117
+ const currentResources = await this.executionService.listResourcesForJobId(jobId);
118
118
  if (currentResources.length > 0) {
119
- currentResources = currentResources.flat();
119
+ const flattenedResources = currentResources.flat();
120
120
  const exIdsSet = new Set();
121
- for (const resource of currentResources) {
122
- exIdsSet.add(resource.metadata.labels['teraslice.terascope.io/exId']);
121
+ for (const resource of flattenedResources) {
122
+ if (resource.metadata.labels) {
123
+ exIdsSet.add(resource.metadata.labels['teraslice.terascope.io/exId']);
124
+ }
123
125
  }
124
126
  const exIdsArr = Array.from(exIdsSet);
125
127
  const exIdsString = exIdsArr.join(', ');
@@ -202,12 +204,14 @@ export class JobsService {
202
204
  }
203
205
  // This will return any orphaned resources in k8s clustering
204
206
  // or an empty array in native clustering
205
- let currentResources = await this.executionService.listResourcesForJobId(jobId);
207
+ const currentResources = await this.executionService.listResourcesForJobId(jobId);
206
208
  if (currentResources.length > 0) {
207
- currentResources = currentResources.flat();
209
+ const flattenedResources = currentResources.flat();
208
210
  const exIdsSet = new Set();
209
- for (const resource of currentResources) {
210
- exIdsSet.add(resource.metadata.labels['teraslice.terascope.io/exId']);
211
+ for (const resource of flattenedResources) {
212
+ if (resource.metadata.labels) {
213
+ exIdsSet.add(resource.metadata.labels['teraslice.terascope.io/exId']);
214
+ }
211
215
  }
212
216
  const exIdsArr = Array.from(exIdsSet);
213
217
  const exIdsString = exIdsArr.join(', ');
@@ -209,7 +209,7 @@ export const schema = {
209
209
  cluster_manager_type: {
210
210
  doc: 'determines which cluster system should be used',
211
211
  default: 'native',
212
- format: ['native', 'kubernetes', 'kubernetesV2']
212
+ format: ['native', 'kubernetesV2']
213
213
  },
214
214
  cpu: {
215
215
  doc: 'number of cpus to reserve per teraslice worker in kubernetes',
@@ -27,16 +27,8 @@ export function shutdownHandler(context, shutdownFn) {
27
27
  || process.env.NODE_TYPE
28
28
  || process.env.assignment
29
29
  || 'unknown-assignment';
30
- const clusteringType = get(context, 'sysconfig.teraslice.cluster_manager_type');
31
- const isK8s = clusteringType === 'kubernetes' || clusteringType === 'kubernetesV2';
32
30
  // this is native clustering only
33
31
  const isProcessRestart = process.env.process_restart;
34
- // everything but the k8s execution_controller should not be allowed be allowed to
35
- // set a non-zero exit code (to avoid being restarted)
36
- // This is overridden in V2 because it can restart
37
- const allowNonZeroExitCode = !(isK8s
38
- && assignment === 'execution_controller'
39
- && context.sysconfig.teraslice.cluster_manager_type === 'kubernetes');
40
32
  const api = {
41
33
  exiting: false,
42
34
  exit
@@ -68,7 +60,7 @@ export function shutdownHandler(context, shutdownFn) {
68
60
  return `already shutting down, remaining ${ms(shutdownTimeout - elapsed)}`;
69
61
  }
70
62
  async function callShutdownFn(event, err) {
71
- // avoid failing before the promse is try / catched in pRaceWithTimeout
63
+ // avoid failing before the promise is try / catched in pRaceWithTimeout
72
64
  await pDelay(100);
73
65
  await shutdownFn(event, err);
74
66
  }
@@ -81,7 +73,6 @@ export function shutdownHandler(context, shutdownFn) {
81
73
  async function exit(event, err) {
82
74
  if (api.exiting)
83
75
  return;
84
- /// Potential logic for cluster_master and asset_service
85
76
  if (err) {
86
77
  if (err.name.includes('Error')) {
87
78
  setStatusCode(1);
@@ -97,15 +88,9 @@ export function shutdownHandler(context, shutdownFn) {
97
88
  }
98
89
  finally {
99
90
  await flushLogs();
100
- if (allowNonZeroExitCode) {
101
- const code = process.exitCode != null ? process.exitCode : 0;
102
- logger.info(`${assignment} shutdown took ${ms(Date.now() - startTime)}, exit with ${code} status code`);
103
- process.exit();
104
- }
105
- else {
106
- logger.info(`${assignment} shutdown took ${ms(Date.now() - startTime)}, exit with zero status code`);
107
- process.exit(0);
108
- }
91
+ const code = process.exitCode != null ? process.exitCode : 0;
92
+ logger.info(`${assignment} shutdown took ${ms(Date.now() - startTime)}, exit with ${code} status code`);
93
+ process.exit();
109
94
  }
110
95
  }
111
96
  function setStatusCode(code) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "teraslice",
3
3
  "displayName": "Teraslice",
4
- "version": "2.17.4",
4
+ "version": "3.0.0-dev.1",
5
5
  "description": "Distributed computing platform for processing JSON data",
6
6
  "homepage": "https://github.com/terascope/teraslice#readme",
7
7
  "bugs": {
@@ -40,11 +40,11 @@
40
40
  },
41
41
  "dependencies": {
42
42
  "@kubernetes/client-node": "~1.3.0",
43
- "@terascope/elasticsearch-api": "~4.13.1",
44
- "@terascope/job-components": "~1.12.4",
45
- "@terascope/teraslice-messaging": "~1.13.4",
46
- "@terascope/types": "~1.4.4",
47
- "@terascope/utils": "~1.10.4",
43
+ "@terascope/elasticsearch-api": "~5.0.0-dev.1",
44
+ "@terascope/job-components": "~2.0.0-dev.1",
45
+ "@terascope/teraslice-messaging": "~2.0.0-dev.1",
46
+ "@terascope/types": "~2.0.0-dev.1",
47
+ "@terascope/utils": "~2.0.0-dev.1",
48
48
  "async-mutex": "~0.5.0",
49
49
  "barbe": "~3.0.17",
50
50
  "body-parser": "~2.2.0",
@@ -57,17 +57,16 @@
57
57
  "get-port": "~7.1.0",
58
58
  "got": "~14.4.8",
59
59
  "ip": "~2.0.1",
60
- "kubernetes-client": "~9.0.0",
61
60
  "ms": "~2.1.3",
62
61
  "nanoid": "~5.1.5",
63
62
  "semver": "~7.7.2",
64
63
  "socket.io": "~1.7.4",
65
64
  "socket.io-client": "~1.7.4",
66
- "terafoundation": "~1.15.4",
65
+ "terafoundation": "~2.0.0-dev.1",
67
66
  "uuid": "~13.0.0"
68
67
  },
69
68
  "devDependencies": {
70
- "@terascope/opensearch-client": "~1.1.4",
69
+ "@terascope/opensearch-client": "~2.0.0-dev.1",
71
70
  "@types/archiver": "~6.0.3",
72
71
  "@types/express": "~5.0.3",
73
72
  "@types/gc-stats": "~1.4.3",
@@ -1,192 +0,0 @@
1
- import { TSError, logError, get, cloneDeep, pRetry } from '@terascope/utils';
2
- import { makeLogger } from '../../../../../workers/helpers/terafoundation.js';
3
- import { K8sResource } from './k8sResource.js';
4
- import { gen } from './k8sState.js';
5
- import { K8s } from './k8s.js';
6
- import { getRetryConfig } from './utils.js';
7
- /*
8
- Execution Life Cycle for _status
9
- pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
10
- Exceptions
11
- rejected - when a job is rejected prior to scheduling
12
- failed - when there is an error while the job is running
13
- aborted - when a job was running at the point when the cluster shutsdown
14
- */
15
- export class KubernetesClusterBackend {
16
- context;
17
- k8s;
18
- logger;
19
- clusterStateInterval;
20
- clusterState = {};
21
- clusterNameLabel;
22
- constructor(context, clusterMasterServer) {
23
- const kubernetesNamespace = get(context, 'sysconfig.teraslice.kubernetes_namespace', 'default');
24
- const clusterName = get(context, 'sysconfig.teraslice.name');
25
- this.context = context;
26
- this.logger = makeLogger(context, 'kubernetes_cluster_service');
27
- this.clusterNameLabel = clusterName.replace(/[^a-zA-Z0-9_\-.]/g, '_').substring(0, 63);
28
- this.clusterState = {};
29
- this.clusterStateInterval = undefined;
30
- this.k8s = new K8s(this.logger, null, kubernetesNamespace, context.sysconfig.teraslice.kubernetes_api_poll_delay, context.sysconfig.teraslice.shutdown_timeout);
31
- clusterMasterServer.onClientOnline((exId) => {
32
- this.logger.info(`execution ${exId} is connected`);
33
- });
34
- }
35
- /**
36
- * getClusterState returns a copy of the clusterState object
37
- * @return {Object} a copy of the clusterState object
38
- */
39
- getClusterState() {
40
- return cloneDeep(this.clusterState);
41
- }
42
- /**
43
- * Creates clusterState by iterating over all k8s pods matching both labels
44
- * app.kubernetes.io/name=teraslice
45
- * app.kubernetes.io/instance=${clusterNameLabel}
46
- * @constructor
47
- * @return {Promise} [description]
48
- */
49
- async _getClusterState() {
50
- return this.k8s.list(`app.kubernetes.io/name=teraslice,app.kubernetes.io/instance=${this.clusterNameLabel}`, 'pods')
51
- .then((k8sPods) => gen(k8sPods, this.clusterState))
52
- .catch((err) => {
53
- // TODO: We might need to do more here. I think it's OK to just
54
- // log though. This only gets used to show slicer info through
55
- // the API. We wouldn't want to disrupt the cluster master
56
- // for rare failures to reach the k8s API.
57
- logError(this.logger, err, 'Error listing teraslice pods in k8s');
58
- });
59
- }
60
- /**
61
- * Return value indicates whether the cluster has enough workers to start
62
- * an execution. It must be able to allocate a slicer and at least one
63
- * worker.
64
- * @return {boolean} Ok to create job?
65
- */
66
- readyForAllocation() {
67
- // return _availableWorkers() >= 2;
68
- // TODO: This will be addressed in the future, see:
69
- // https://github.com/terascope/teraslice/issues/744
70
- return true;
71
- }
72
- /**
73
- * Creates k8s Service and Job for the Teraslice Execution Controller
74
- * (formerly slicer). This currently works by creating a service with a
75
- * hostname that contains the exId in it listening on a well known port.
76
- * The hostname and port are used later by the workers to contact this
77
- * Execution Controller.
78
- * @param {Object} execution Object containing execution details
79
- * @return {Promise} [description]
80
- */
81
- async allocateSlicer(ex) {
82
- const execution = cloneDeep(ex);
83
- execution.slicer_port = 45680;
84
- const exJobResource = new K8sResource('jobs', 'execution_controller', this.context.sysconfig.teraslice, execution, this.logger);
85
- const exJob = exJobResource.resource;
86
- this.logger.debug(exJob, 'execution allocating slicer');
87
- const jobResult = await this.k8s.post(exJob, 'job');
88
- this.logger.debug(jobResult, 'k8s slicer job submitted');
89
- let controllerLabel;
90
- if (jobResult.spec.selector.matchLabels['controller-uid']) {
91
- /// If running on kubernetes < v1.27.0
92
- controllerLabel = 'controller-uid';
93
- }
94
- else {
95
- /// If running on kubernetes v1.27.0 or later
96
- controllerLabel = 'batch.kubernetes.io/controller-uid';
97
- }
98
- const controllerUid = jobResult.spec.selector.matchLabels[controllerLabel];
99
- const pod = await this.k8s.waitForSelectedPod(`${controllerLabel}=${controllerUid}`, undefined, this.context.sysconfig.teraslice.slicer_timeout);
100
- this.logger.debug(`Slicer is using IP: ${pod.status.podIP}`);
101
- execution.slicer_hostname = `${pod.status.podIP}`;
102
- return execution;
103
- }
104
- /**
105
- * Creates k8s deployment that executes Teraslice workers for specified
106
- * Execution.
107
- * @param {Object} execution Object that contains information of Execution
108
- * @return {Promise} [description]
109
- */
110
- async allocateWorkers(execution) {
111
- // NOTE: I tried to set these on the execution inside allocateSlicer
112
- // but these properties were gone by the time this was called, perhaps
113
- // because they are not on the schema. So I do this k8s API call
114
- // instead.
115
- const selector = `app.kubernetes.io/component=execution_controller,teraslice.terascope.io/jobId=${execution.job_id}`;
116
- const jobs = await pRetry(() => this.k8s.nonEmptyList(selector, 'jobs'), getRetryConfig());
117
- // @ts-expect-error
118
- execution.k8sName = jobs.items[0].metadata.name;
119
- // @ts-expect-error
120
- execution.k8sUid = jobs.items[0].metadata.uid;
121
- const kr = new K8sResource('deployments', 'worker', this.context.sysconfig.teraslice, execution, this.logger);
122
- const workerDeployment = kr.resource;
123
- this.logger.debug(`workerDeployment:\n\n${JSON.stringify(workerDeployment, null, 2)}`);
124
- return this.k8s.post(workerDeployment, 'deployment')
125
- .then((result) => this.logger.debug(`k8s worker deployment submitted: ${JSON.stringify(result)}`))
126
- .catch((err) => {
127
- const error = new TSError(err, {
128
- reason: 'Error submitting k8s worker deployment'
129
- });
130
- return Promise.reject(error);
131
- });
132
- }
133
- // FIXME: These functions should probably do something with the response
134
- // NOTE: I find is strange that the expected return value here is
135
- // effectively the same as the function inputs
136
- async addWorkers(executionContext, numWorkers) {
137
- await this.k8s.scaleExecution(executionContext.ex_id, numWorkers, 'add');
138
- return { action: 'add', ex_id: executionContext.ex_id, workerNum: numWorkers };
139
- }
140
- // NOTE: This is passed exId instead of executionContext like addWorkers and
141
- // removeWorkers. I don't know why, just dealing with it.
142
- async removeWorkers(exId, numWorkers) {
143
- await this.k8s.scaleExecution(exId, numWorkers, 'remove');
144
- return { action: 'remove', ex_id: exId, workerNum: numWorkers };
145
- }
146
- // TODO: fix types here
147
- async setWorkers(executionContext, numWorkers) {
148
- await this.k8s.scaleExecution(executionContext.ex_id, numWorkers, 'set');
149
- return { action: 'set', ex_id: executionContext.ex_id, workerNum: numWorkers };
150
- }
151
- /**
152
- * Stops all workers for exId
153
- * @param {String} exId The execution ID of the Execution to stop
154
- * @param {StopExecutionOptions} options force, timeout, and excludeNode
155
- * force: stop all related pod, deployment, and job resources
156
- * timeout and excludeNode are not used in k8s clustering.
157
- * @return {Promise}
158
- */
159
- async stopExecution(exId, options) {
160
- return this.k8s.deleteExecution(exId, options?.force);
161
- }
162
- async shutdown() {
163
- clearInterval(this.clusterStateInterval);
164
- }
165
- /**
166
- * Returns a list of all k8s resources associated with a job ID
167
- * @param {string} jobId The job ID of the job to list associated resources
168
- * @returns {Array<any>}
169
- */
170
- async listResourcesForJobId(jobId) {
171
- const resources = [];
172
- const resourceTypes = ['pods', 'deployments', 'services', 'jobs', 'replicasets'];
173
- for (const type of resourceTypes) {
174
- const list = await this.k8s.list(`teraslice.terascope.io/jobId=${jobId}`, type);
175
- if (list.items.length > 0) {
176
- resources.push(list.items);
177
- }
178
- }
179
- return resources;
180
- }
181
- async initialize() {
182
- this.logger.info('kubernetes clustering initializing');
183
- // Periodically update cluster state, update period controlled by:
184
- // context.sysconfig.teraslice.node_state_interval
185
- this.clusterStateInterval = setInterval(() => {
186
- this.logger.trace('cluster_master requesting cluster state update.');
187
- this._getClusterState();
188
- }, this.context.sysconfig.teraslice.node_state_interval);
189
- await this.k8s.init();
190
- }
191
- }
192
- //# sourceMappingURL=index.js.map