teraslice 2.11.0 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/dist/src/interfaces.js +12 -0
  2. package/dist/src/lib/cluster/cluster_master.js +246 -0
  3. package/dist/src/lib/cluster/node_master.js +355 -0
  4. package/dist/src/lib/cluster/services/api.js +663 -0
  5. package/dist/src/lib/cluster/services/assets.js +226 -0
  6. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/index.js +192 -0
  7. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8s.js +481 -0
  8. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +414 -0
  9. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +59 -0
  10. package/dist/src/lib/cluster/services/cluster/backends/kubernetes/utils.js +43 -0
  11. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/index.js +192 -0
  12. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/interfaces.js +2 -0
  13. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.js +423 -0
  14. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sDeploymentResource.js +60 -0
  15. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sJobResource.js +55 -0
  16. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sResource.js +359 -0
  17. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sServiceResource.js +37 -0
  18. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/k8sState.js +60 -0
  19. package/dist/src/lib/cluster/services/cluster/backends/kubernetesV2/utils.js +170 -0
  20. package/dist/src/lib/cluster/services/cluster/backends/native/dispatch.js +13 -0
  21. package/dist/src/lib/cluster/services/cluster/backends/native/index.js +526 -0
  22. package/dist/src/lib/cluster/services/cluster/backends/native/messaging.js +547 -0
  23. package/dist/src/lib/cluster/services/cluster/backends/state-utils.js +26 -0
  24. package/dist/src/lib/cluster/services/cluster/index.js +17 -0
  25. package/dist/src/lib/cluster/services/execution.js +435 -0
  26. package/dist/src/lib/cluster/services/index.js +6 -0
  27. package/dist/src/lib/cluster/services/interfaces.js +2 -0
  28. package/dist/src/lib/cluster/services/jobs.js +454 -0
  29. package/dist/src/lib/config/default-sysconfig.js +26 -0
  30. package/dist/src/lib/config/index.js +22 -0
  31. package/dist/src/lib/config/schemas/system.js +360 -0
  32. package/dist/src/lib/storage/analytics.js +86 -0
  33. package/dist/src/lib/storage/assets.js +401 -0
  34. package/dist/src/lib/storage/backends/elasticsearch_store.js +494 -0
  35. package/dist/src/lib/storage/backends/mappings/analytics.js +50 -0
  36. package/dist/src/lib/storage/backends/mappings/asset.js +41 -0
  37. package/dist/src/lib/storage/backends/mappings/ex.js +62 -0
  38. package/dist/src/lib/storage/backends/mappings/job.js +38 -0
  39. package/dist/src/lib/storage/backends/mappings/state.js +38 -0
  40. package/dist/src/lib/storage/backends/s3_store.js +237 -0
  41. package/dist/src/lib/storage/execution.js +300 -0
  42. package/dist/src/lib/storage/index.js +7 -0
  43. package/dist/src/lib/storage/jobs.js +81 -0
  44. package/dist/src/lib/storage/state.js +255 -0
  45. package/dist/src/lib/utils/api_utils.js +157 -0
  46. package/dist/src/lib/utils/asset_utils.js +94 -0
  47. package/dist/src/lib/utils/date_utils.js +52 -0
  48. package/dist/src/lib/utils/encoding_utils.js +27 -0
  49. package/dist/src/lib/utils/events.js +4 -0
  50. package/dist/src/lib/utils/file_utils.js +124 -0
  51. package/dist/src/lib/utils/id_utils.js +15 -0
  52. package/dist/src/lib/utils/port_utils.js +32 -0
  53. package/dist/src/lib/workers/assets/index.js +3 -0
  54. package/dist/src/lib/workers/assets/loader-executable.js +40 -0
  55. package/dist/src/lib/workers/assets/loader.js +73 -0
  56. package/dist/src/lib/workers/assets/spawn.js +55 -0
  57. package/dist/src/lib/workers/context/execution-context.js +12 -0
  58. package/dist/src/lib/workers/context/terafoundation-context.js +8 -0
  59. package/dist/src/lib/workers/execution-controller/execution-analytics.js +188 -0
  60. package/dist/src/lib/workers/execution-controller/index.js +1024 -0
  61. package/dist/src/lib/workers/execution-controller/recovery.js +151 -0
  62. package/dist/src/lib/workers/execution-controller/scheduler.js +390 -0
  63. package/dist/src/lib/workers/execution-controller/slice-analytics.js +96 -0
  64. package/dist/src/lib/workers/helpers/job.js +80 -0
  65. package/dist/src/lib/workers/helpers/op-analytics.js +22 -0
  66. package/dist/src/lib/workers/helpers/terafoundation.js +34 -0
  67. package/dist/src/lib/workers/helpers/worker-shutdown.js +169 -0
  68. package/dist/src/lib/workers/metrics/index.js +108 -0
  69. package/dist/src/lib/workers/worker/index.js +378 -0
  70. package/dist/src/lib/workers/worker/slice.js +122 -0
  71. package/dist/test/config/schemas/system_schema-spec.js +37 -0
  72. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.js +316 -0
  73. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sResource-spec.js +795 -0
  74. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sState-multicluster-spec.js +67 -0
  75. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/k8sState-spec.js +84 -0
  76. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/utils-spec.js +132 -0
  77. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.js +455 -0
  78. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sResource-v2-spec.js +818 -0
  79. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-multicluster-v2-spec.js +67 -0
  80. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8sState-v2-spec.js +84 -0
  81. package/dist/test/lib/cluster/services/cluster/backends/kubernetes/v2/utils-v2-spec.js +320 -0
  82. package/dist/test/lib/cluster/services/cluster/backends/state-utils-spec.js +37 -0
  83. package/dist/test/node_master-spec.js +188 -0
  84. package/dist/test/services/api-spec.js +80 -0
  85. package/dist/test/services/assets-spec.js +158 -0
  86. package/dist/test/services/messaging-spec.js +440 -0
  87. package/dist/test/storage/assets_storage-spec.js +95 -0
  88. package/dist/test/storage/s3_store-spec.js +138 -0
  89. package/dist/test/test.config.js +8 -0
  90. package/dist/test/test.setup.js +6 -0
  91. package/dist/test/utils/api_utils-spec.js +86 -0
  92. package/dist/test/utils/asset_utils-spec.js +141 -0
  93. package/dist/test/utils/elastic_utils-spec.js +25 -0
  94. package/dist/test/workers/execution-controller/execution-controller-spec.js +371 -0
  95. package/dist/test/workers/execution-controller/execution-special-test-cases-spec.js +520 -0
  96. package/dist/test/workers/execution-controller/execution-test-cases-spec.js +338 -0
  97. package/dist/test/workers/execution-controller/recovery-spec.js +160 -0
  98. package/dist/test/workers/execution-controller/scheduler-spec.js +249 -0
  99. package/dist/test/workers/execution-controller/slice-analytics-spec.js +121 -0
  100. package/dist/test/workers/fixtures/ops/example-op/processor.js +20 -0
  101. package/dist/test/workers/fixtures/ops/example-op/schema.js +19 -0
  102. package/dist/test/workers/fixtures/ops/example-reader/fetcher.js +20 -0
  103. package/dist/test/workers/fixtures/ops/example-reader/schema.js +41 -0
  104. package/dist/test/workers/fixtures/ops/example-reader/slicer.js +37 -0
  105. package/dist/test/workers/fixtures/ops/new-op/processor.js +29 -0
  106. package/dist/test/workers/fixtures/ops/new-op/schema.js +18 -0
  107. package/dist/test/workers/fixtures/ops/new-reader/fetcher.js +19 -0
  108. package/dist/test/workers/fixtures/ops/new-reader/schema.js +23 -0
  109. package/dist/test/workers/fixtures/ops/new-reader/slicer.js +13 -0
  110. package/dist/test/workers/helpers/configs.js +130 -0
  111. package/dist/test/workers/helpers/execution-controller-helper.js +49 -0
  112. package/dist/test/workers/helpers/index.js +5 -0
  113. package/dist/test/workers/helpers/test-context.js +210 -0
  114. package/dist/test/workers/helpers/zip-directory.js +25 -0
  115. package/dist/test/workers/worker/slice-spec.js +333 -0
  116. package/dist/test/workers/worker/worker-spec.js +356 -0
  117. package/package.json +94 -94
  118. package/service.js +0 -0
@@ -0,0 +1,192 @@
1
+ import { TSError, logError, get, cloneDeep, pRetry } from '@terascope/utils';
2
+ import { makeLogger } from '../../../../../workers/helpers/terafoundation.js';
3
+ import { gen } from './k8sState.js';
4
+ import { K8s } from './k8s.js';
5
+ import { getRetryConfig } from './utils.js';
6
+ import { K8sJobResource } from './k8sJobResource.js';
7
+ import { K8sServiceResource } from './k8sServiceResource.js';
8
+ import { K8sDeploymentResource } from './k8sDeploymentResource.js';
9
+ /*
10
+ Execution Life Cycle for _status
11
+ pending -> scheduling -> running -> [ paused -> running ] -> [ stopped | completed ]
12
+ Exceptions
13
+ rejected - when a job is rejected prior to scheduling
14
+ failed - when there is an error while the job is running
15
+ aborted - when a job was running at the point when the cluster shutsdown
16
+ */
17
+ export class KubernetesClusterBackendV2 {
18
+ context;
19
+ k8s;
20
+ logger;
21
+ clusterStateInterval;
22
+ clusterState = {};
23
+ clusterNameLabel;
24
+ constructor(context, clusterMasterServer) {
25
+ const kubernetesNamespace = get(context, 'sysconfig.teraslice.kubernetes_namespace', 'default');
26
+ const clusterName = get(context, 'sysconfig.teraslice.name');
27
+ this.context = context;
28
+ this.logger = makeLogger(context, 'kubernetesV2_cluster_service');
29
+ this.clusterNameLabel = clusterName.replace(/[^a-zA-Z0-9_\-.]/g, '_').substring(0, 63);
30
+ this.clusterState = {};
31
+ this.clusterStateInterval = undefined;
32
+ this.k8s = new K8s(this.logger, null, kubernetesNamespace, context.sysconfig.teraslice.kubernetes_api_poll_delay, context.sysconfig.teraslice.shutdown_timeout);
33
+ clusterMasterServer.onClientOnline((exId) => {
34
+ this.logger.info(`execution ${exId} is connected`);
35
+ });
36
+ }
37
+ /**
38
+ * getClusterState returns a copy of the clusterState object
39
+ * @return {Object} a copy of the clusterState object
40
+ */
41
+ getClusterState() {
42
+ return cloneDeep(this.clusterState);
43
+ }
44
+ /**
45
+ * Creates clusterState by iterating over all k8s pods matching both labels
46
+ * app.kubernetes.io/name=teraslice
47
+ * app.kubernetes.io/instance=${clusterNameLabel}
48
+ * @constructor
49
+ * @return {Promise} void
50
+ */
51
+ async _getClusterState() {
52
+ return this.k8s.list(`app.kubernetes.io/name=teraslice,app.kubernetes.io/instance=${this.clusterNameLabel}`, 'pods')
53
+ .then((tsPodList) => gen(tsPodList, this.clusterState))
54
+ .catch((err) => {
55
+ // TODO: We might need to do more here. I think it's OK to just
56
+ // log though. This only gets used to show slicer info through
57
+ // the API. We wouldn't want to disrupt the cluster master
58
+ // for rare failures to reach the k8s API.
59
+ logError(this.logger, err, 'Error listing teraslice pods in k8s');
60
+ });
61
+ }
62
+ /**
63
+ * Return value indicates whether the cluster has enough workers to start
64
+ * an execution. It must be able to allocate a slicer and at least one
65
+ * worker.
66
+ * @return {boolean} Ok to create job?
67
+ */
68
+ readyForAllocation() {
69
+ // return _availableWorkers() >= 2;
70
+ // TODO: This will be addressed in the future, see:
71
+ // https://github.com/terascope/teraslice/issues/744
72
+ return true;
73
+ }
74
+ /**
75
+ * Creates k8s Service and Job for the Teraslice Execution Controller
76
+ * (formerly slicer). This currently works by creating a service with a
77
+ * hostname that contains the exId in it listening on a well known port.
78
+ * The hostname and port are used later by the workers to contact this
79
+ * Execution Controller.
80
+ * @param {Object} execution Object containing execution details
81
+ * @return {Promise} [description]
82
+ */
83
+ async allocateSlicer(ex) {
84
+ const execution = cloneDeep(ex);
85
+ execution.slicer_port = 45680;
86
+ const exJobResource = new K8sJobResource(this.context.sysconfig.teraslice, execution, this.logger);
87
+ const exJob = exJobResource.resource;
88
+ this.logger.debug(exJob, 'execution allocating slicer');
89
+ const jobResult = await this.k8s.post(exJob);
90
+ if (!jobResult.metadata.uid) {
91
+ throw new Error('Required field uid missing from jobResult.metadata');
92
+ }
93
+ const exServiceResource = new K8sServiceResource(this.context.sysconfig.teraslice, execution, this.logger,
94
+ // Needed to create the deployment and service resource ownerReferences
95
+ jobResult.metadata.name, jobResult.metadata.uid);
96
+ const exService = exServiceResource.resource;
97
+ const serviceResult = await this.k8s.post(exService);
98
+ this.logger.debug(jobResult, 'k8s slicer job submitted');
99
+ const exServiceName = serviceResult.metadata.name;
100
+ const exServiceHostName = `${exServiceName}.${this.k8s.defaultNamespace}`;
101
+ this.logger.debug(`Slicer is using host name: ${exServiceHostName}`);
102
+ execution.slicer_hostname = `${exServiceHostName}`;
103
+ return execution;
104
+ }
105
+ /**
106
+ * Creates k8s deployment that executes Teraslice workers for specified
107
+ * Execution.
108
+ * @param {Object} execution Object that contains information of Execution
109
+ * @return {Promise} [description]
110
+ */
111
+ async allocateWorkers(execution) {
112
+ // NOTE: I tried to set these on the execution inside allocateSlicer
113
+ // but these properties were gone by the time this was called, perhaps
114
+ // because they are not on the schema. So I do this k8s API call
115
+ // instead.
116
+ const selector = `app.kubernetes.io/component=execution_controller,teraslice.terascope.io/jobId=${execution.job_id}`;
117
+ const jobs = await pRetry(() => this.k8s.nonEmptyJobList(selector), getRetryConfig());
118
+ if (!jobs.items[0].metadata.uid) {
119
+ throw new Error('Required field uid missing from kubernetes job metadata');
120
+ }
121
+ const kr = new K8sDeploymentResource(this.context.sysconfig.teraslice, execution, this.logger, jobs.items[0].metadata.name, jobs.items[0].metadata.uid);
122
+ const workerDeployment = kr.resource;
123
+ this.logger.debug(`workerDeployment:\n\n${JSON.stringify(workerDeployment, null, 2)}`);
124
+ return this.k8s.post(workerDeployment)
125
+ .then((result) => this.logger.debug(`k8s worker deployment submitted: ${JSON.stringify(result)}`))
126
+ .catch((err) => {
127
+ const error = new TSError(err, {
128
+ reason: 'Error submitting k8s worker deployment'
129
+ });
130
+ return Promise.reject(error);
131
+ });
132
+ }
133
+ // FIXME: These functions should probably do something with the response
134
+ // NOTE: I find is strange that the expected return value here is
135
+ // effectively the same as the function inputs
136
+ async addWorkers(executionContext, numWorkers) {
137
+ await this.k8s.scaleExecution(executionContext.ex_id, numWorkers, 'add');
138
+ return { action: 'add', ex_id: executionContext.ex_id, workerNum: numWorkers };
139
+ }
140
+ // NOTE: This is passed exId instead of executionContext like addWorkers and
141
+ // removeWorkers. I don't know why, just dealing with it.
142
+ async removeWorkers(exId, numWorkers) {
143
+ await this.k8s.scaleExecution(exId, numWorkers, 'remove');
144
+ return { action: 'remove', ex_id: exId, workerNum: numWorkers };
145
+ }
146
+ // TODO: fix types here
147
+ async setWorkers(executionContext, numWorkers) {
148
+ await this.k8s.scaleExecution(executionContext.ex_id, numWorkers, 'set');
149
+ return { action: 'set', ex_id: executionContext.ex_id, workerNum: numWorkers };
150
+ }
151
+ /**
152
+ * Stops all workers for exId
153
+ * @param {String} exId The execution ID of the Execution to stop
154
+ * @param {StopExecutionOptions} options force, timeout, and excludeNode
155
+ * force: stop all related pod, deployment, and job resources
156
+ * timeout and excludeNode are not used in k8s clustering.
157
+ * @return {Promise}
158
+ */
159
+ async stopExecution(exId, options) {
160
+ return this.k8s.deleteExecution(exId, options?.force);
161
+ }
162
+ async shutdown() {
163
+ clearInterval(this.clusterStateInterval);
164
+ }
165
+ /**
166
+ * Returns a list of all k8s resources associated with a job ID
167
+ * @param {string} jobId The job ID of the job to list associated resources
168
+ * @returns {Array<TSPod[] | TSDeployment[] | TSService[]
169
+ * | TSJob[] | TSReplicaSet[]>}
170
+ */
171
+ async listResourcesForJobId(jobId) {
172
+ const resources = [];
173
+ const resourceTypes = ['pods', 'deployments', 'services', 'jobs', 'replicasets'];
174
+ for (const type of resourceTypes) {
175
+ const list = await this.k8s.list(`teraslice.terascope.io/jobId=${jobId}`, type);
176
+ if (list.items.length > 0) {
177
+ resources.push(list.items);
178
+ }
179
+ }
180
+ return resources;
181
+ }
182
+ async initialize() {
183
+ this.logger.info('kubernetesV2 clustering initializing');
184
+ // Periodically update cluster state, update period controlled by:
185
+ // context.sysconfig.teraslice.node_state_interval
186
+ this.clusterStateInterval = setInterval(() => {
187
+ this.logger.trace('cluster_master requesting cluster state update.');
188
+ this._getClusterState();
189
+ }, this.context.sysconfig.teraslice.node_state_interval);
190
+ }
191
+ }
192
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=interfaces.js.map
@@ -0,0 +1,423 @@
1
+ import { TSError, get, isEmpty, pDelay, pRetry } from '@terascope/utils';
2
+ import * as k8s from '@kubernetes/client-node';
3
+ import { convertToTSResource, convertToTSResourceList, getRetryConfig, isDeployment, isJob, isPod, isReplicaSet, isService, isTSPod } from './utils.js';
4
+ export class K8s {
5
+ logger;
6
+ apiPollDelay;
7
+ defaultNamespace;
8
+ shutdownTimeout;
9
+ kc;
10
+ k8sAppsV1Api;
11
+ k8sBatchV1Api;
12
+ k8sCoreV1Api;
13
+ constructor(logger, clientConfig, defaultNamespace, apiPollDelay, shutdownTimeout) {
14
+ this.apiPollDelay = apiPollDelay;
15
+ this.defaultNamespace = defaultNamespace || 'default';
16
+ this.logger = logger;
17
+ this.shutdownTimeout = shutdownTimeout; // this is in milliseconds
18
+ this.kc = new k8s.KubeConfig();
19
+ if (clientConfig) {
20
+ this.kc.loadFromOptions(clientConfig);
21
+ }
22
+ else if (process.env.KUBERNETES_SERVICE_HOST && process.env.KUBERNETES_SERVICE_PORT) {
23
+ this.kc.loadFromCluster();
24
+ }
25
+ else {
26
+ this.kc.loadFromDefault();
27
+ }
28
+ this.k8sAppsV1Api = this.kc.makeApiClient(k8s.AppsV1Api);
29
+ this.k8sBatchV1Api = this.kc.makeApiClient(k8s.BatchV1Api);
30
+ this.k8sCoreV1Api = this.kc.makeApiClient(k8s.CoreV1Api);
31
+ }
32
+ /**
33
+ * Returns the k8s NamespaceList object
34
+ * @return {Promise} [description]
35
+ */
36
+ async getNamespaces() {
37
+ let namespaces;
38
+ try {
39
+ namespaces = await pRetry(() => this.k8sCoreV1Api.listNamespace(), getRetryConfig());
40
+ }
41
+ catch (err) {
42
+ const error = new TSError(err, {
43
+ reason: 'Failure getting in namespaces'
44
+ });
45
+ throw error;
46
+ }
47
+ return namespaces.body;
48
+ }
49
+ /**
50
+ * Returns the first pod matching the provided selector after it has
51
+ * entered the `Running` state.
52
+ *
53
+ * TODO: Make more generic to search for different statuses
54
+ *
55
+ * NOTE: If your selector will return multiple pods, this method probably
56
+ * won't work for you.
57
+ * @param {String} selector kubernetes selector, like 'controller-uid=XXX'
58
+ * @param {String} ns namespace to search, this will override the default
59
+ * @param {Number} timeout time, in ms, to wait for pod to start
60
+ * @return {k8s.V1Pod} pod object
61
+ *
62
+ * TODO: Should this use the cluster state that gets polled periodically,
63
+ * rather than making it's own k8s API calls
64
+ */
65
+ async waitForSelectedPod(selector, statusType, ns, timeout = 10000) {
66
+ const namespace = ns || this.defaultNamespace;
67
+ let now = Date.now();
68
+ const end = now + timeout;
69
+ while (true) {
70
+ const result = await pRetry(() => this.k8sCoreV1Api
71
+ .listNamespacedPod(namespace, undefined, undefined, undefined, undefined, selector), getRetryConfig());
72
+ // NOTE: This assumes the first pod returned.
73
+ const pod = get(result, 'body.items[0]');
74
+ if (pod && isTSPod(pod)) {
75
+ if (statusType === 'readiness-probe') {
76
+ if (pod.status.conditions) {
77
+ for (const condition of pod.status.conditions) {
78
+ if (condition.type === 'ContainersReady'
79
+ && condition.status === 'True') {
80
+ return pod;
81
+ }
82
+ }
83
+ }
84
+ }
85
+ else if (statusType === 'pod-status') {
86
+ if (get(pod, 'status.phase') === 'Running')
87
+ return pod;
88
+ }
89
+ }
90
+ if (now > end)
91
+ throw new Error(`Timeout waiting for pod matching: ${selector}`);
92
+ this.logger.debug(`waiting for pod matching: ${selector}`);
93
+ await pDelay(this.apiPollDelay);
94
+ now = Date.now();
95
+ }
96
+ }
97
+ /**
98
+ * Waits for the number of pods to equal number.
99
+ * @param {Number} number Number of pods to wait for, e.g.: 0, 10
100
+ * @param {String} selector kubernetes selector, like 'controller-uid=XXX'
101
+ * @param {String} ns namespace to search, this will override the default
102
+ * @param {Number} timeout time, in ms, to wait for pod to start
103
+ * @return {k8s.V1Pod[]} Array of pod objects
104
+ *
105
+ * TODO: Should this use the cluster state that gets polled periodically,
106
+ * rather than making it's own k8s API calls?
107
+ */
108
+ async waitForNumPods(number, selector, ns, timeout = 10000) {
109
+ const namespace = ns || this.defaultNamespace;
110
+ let now = Date.now();
111
+ const end = now + timeout;
112
+ while (true) {
113
+ const result = await pRetry(() => this.k8sCoreV1Api
114
+ .listNamespacedPod(namespace, undefined, undefined, undefined, undefined, selector), getRetryConfig());
115
+ const podList = get(result, 'body.items');
116
+ if (podList.length === number)
117
+ return podList;
118
+ const msg = `Waiting: pods matching ${selector} is ${podList.length}/${number}`;
119
+ if (now > end)
120
+ throw new Error(`Timeout ${msg}`);
121
+ this.logger.debug(msg);
122
+ await pDelay(this.apiPollDelay);
123
+ now = Date.now();
124
+ }
125
+ }
126
+ async list(selector, objType, ns) {
127
+ const namespace = ns || this.defaultNamespace;
128
+ let responseObj;
129
+ const params = [
130
+ namespace,
131
+ undefined,
132
+ undefined,
133
+ undefined,
134
+ undefined,
135
+ selector
136
+ ];
137
+ try {
138
+ if (objType === 'deployments') {
139
+ responseObj = await pRetry(() => this.k8sAppsV1Api.listNamespacedDeployment(...params), getRetryConfig());
140
+ }
141
+ else if (objType === 'jobs') {
142
+ responseObj = await pRetry(() => this.k8sBatchV1Api.listNamespacedJob(...params), getRetryConfig());
143
+ }
144
+ else if (objType === 'pods') {
145
+ responseObj = await pRetry(() => this.k8sCoreV1Api.listNamespacedPod(...params), getRetryConfig());
146
+ }
147
+ else if (objType === 'replicasets') {
148
+ responseObj = await pRetry(() => this.k8sAppsV1Api.listNamespacedReplicaSet(...params), getRetryConfig());
149
+ }
150
+ else if (objType === 'services') {
151
+ responseObj = await pRetry(() => this.k8sCoreV1Api.listNamespacedService(...params), getRetryConfig());
152
+ }
153
+ else {
154
+ const error = new Error(`Invalid objType provided to get: ${objType}`);
155
+ this.logger.error(error);
156
+ return Promise.reject(error);
157
+ }
158
+ return convertToTSResourceList(responseObj.body);
159
+ }
160
+ catch (e) {
161
+ const err = new Error(`Request k8s.list of ${objType} with selector ${selector} failed: ${e}`);
162
+ this.logger.error(err);
163
+ return Promise.reject(err);
164
+ }
165
+ }
166
+ async nonEmptyJobList(selector) {
167
+ const jobs = await this.list(selector, 'jobs');
168
+ if (jobs.items.length === 1) {
169
+ return jobs;
170
+ }
171
+ else if (jobs.items.length === 0) {
172
+ const msg = `Teraslice job matching the following selector was not found: ${selector} (retriable)`;
173
+ this.logger.warn(msg);
174
+ throw new TSError(msg, { retryable: true });
175
+ }
176
+ else {
177
+ throw new TSError(`Unexpected number of Teraslice jobs matching the following selector: ${selector}`, {
178
+ retryable: true
179
+ });
180
+ }
181
+ }
182
+ async post(manifest) {
183
+ let responseObj;
184
+ try {
185
+ if (isDeployment(manifest)) {
186
+ responseObj = await this.k8sAppsV1Api
187
+ .createNamespacedDeployment(this.defaultNamespace, manifest);
188
+ }
189
+ else if (isJob(manifest)) {
190
+ responseObj = await this.k8sBatchV1Api
191
+ .createNamespacedJob(this.defaultNamespace, manifest);
192
+ }
193
+ else if (isPod(manifest)) {
194
+ responseObj = await this.k8sCoreV1Api
195
+ .createNamespacedPod(this.defaultNamespace, manifest);
196
+ }
197
+ else if (isReplicaSet(manifest)) {
198
+ responseObj = await this.k8sAppsV1Api
199
+ .createNamespacedReplicaSet(this.defaultNamespace, manifest);
200
+ }
201
+ else if (isService(manifest)) {
202
+ responseObj = await this.k8sCoreV1Api
203
+ .createNamespacedService(this.defaultNamespace, manifest);
204
+ }
205
+ else {
206
+ const error = new Error('Invalid manifest type');
207
+ return Promise.reject(error);
208
+ }
209
+ return convertToTSResource(responseObj.body);
210
+ }
211
+ catch (e) {
212
+ const err = new Error(`Request k8s.post of ${manifest.kind} with body ${JSON.stringify(manifest)} failed: ${e}`);
213
+ return Promise.reject(err);
214
+ }
215
+ }
216
+ /**
217
+ * Patches specified k8s deployment with the provided record
218
+ * @param {String} record record, like 'app=teraslice'
219
+ * @param {String} name Name of the deployment to patch
220
+ * @return {Object} body of k8s patch response.
221
+ */
222
+ // TODO: I renamed this from patchDeployment to just patch because this is
223
+ // the low level k8s api method, I expect to eventually change the interface
224
+ // on this to require `objType` to support patching other things
225
+ async patch(record, name) {
226
+ let responseObj;
227
+ try {
228
+ const options = { headers: { 'Content-type': k8s.PatchUtils.PATCH_FORMAT_JSON_PATCH } };
229
+ responseObj = await pRetry(() => this.k8sAppsV1Api
230
+ .patchNamespacedDeployment(name, this.defaultNamespace, record, undefined, undefined, undefined, undefined, undefined, options), getRetryConfig());
231
+ return responseObj.body;
232
+ }
233
+ catch (e) {
234
+ const err = new Error(`Request k8s.patch with name: ${name} failed with: ${e}`);
235
+ this.logger.error(err);
236
+ return Promise.reject(err);
237
+ }
238
+ }
239
+ async delete(name, objType, force) {
240
+ if (name === undefined || name.trim() === '') {
241
+ throw new Error(`Name of resource to delete must be specified. Received: "${name}".`);
242
+ }
243
+ let responseObj;
244
+ // To get a Job to remove the associated pods you have to
245
+ // include a body like the one below with the delete request.
246
+ // To force: setting gracePeriodSeconds to 1 will send a SIGKILL command to the resource
247
+ const deleteOptions = {
248
+ apiVersion: 'v1',
249
+ kind: 'DeleteOptions',
250
+ propagationPolicy: 'Background'
251
+ };
252
+ if (force) {
253
+ deleteOptions.gracePeriodSeconds = 1;
254
+ }
255
+ const params = [
256
+ name,
257
+ this.defaultNamespace,
258
+ undefined,
259
+ undefined,
260
+ undefined,
261
+ undefined,
262
+ undefined,
263
+ deleteOptions
264
+ ];
265
+ const deleteWithErrorHandling = async (deleteFn) => {
266
+ try {
267
+ const res = await deleteFn();
268
+ return res;
269
+ }
270
+ catch (e) {
271
+ if (e.statusCode) {
272
+ // 404 should be an acceptable response to a delete request, not an error
273
+ if (e.statusCode === 404) {
274
+ this.logger.info(`No ${objType} with name ${name} found while attempting to delete.`);
275
+ return e;
276
+ }
277
+ }
278
+ throw e;
279
+ }
280
+ };
281
+ try {
282
+ if (objType === 'services') {
283
+ responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sCoreV1Api
284
+ .deleteNamespacedService(...params)), getRetryConfig());
285
+ }
286
+ else if (objType === 'deployments') {
287
+ responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sAppsV1Api
288
+ .deleteNamespacedDeployment(...params)), getRetryConfig());
289
+ }
290
+ else if (objType === 'jobs') {
291
+ responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sBatchV1Api
292
+ .deleteNamespacedJob(...params)), getRetryConfig());
293
+ }
294
+ else if (objType === 'pods') {
295
+ responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sCoreV1Api
296
+ .deleteNamespacedPod(...params)), getRetryConfig());
297
+ }
298
+ else if (objType === 'replicasets') {
299
+ responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sAppsV1Api
300
+ .deleteNamespacedReplicaSet(...params)), getRetryConfig());
301
+ }
302
+ else {
303
+ throw new Error(`Invalid objType: ${objType}`);
304
+ }
305
+ return responseObj.body;
306
+ }
307
+ catch (e) {
308
+ const err = new Error(`Request k8s.delete with name: ${name} failed with: ${e}`);
309
+ this.logger.error(err);
310
+ return Promise.reject(err);
311
+ }
312
+ }
313
+ /**
314
+ * Delete all of Kubernetes resources related to the specified exId
315
+ * @param {String} exId ID of the execution
316
+ * @param {Boolean} force Forcefully stop all pod, deployment,
317
+ * service, replicaset and job resources
318
+ * @return {Promise}
319
+ */
320
+ async deleteExecution(exId, force = false) {
321
+ if (!exId) {
322
+ throw new Error('deleteExecution requires an executionId');
323
+ }
324
+ if (force) {
325
+ // Order matters. If we delete a parent resource before its children it
326
+ // will be marked for background deletion and then can't be force deleted.
327
+ await this._deleteObjByExId(exId, 'worker', 'pods', force);
328
+ await this._deleteObjByExId(exId, 'worker', 'replicasets', force);
329
+ await this._deleteObjByExId(exId, 'worker', 'deployments', force);
330
+ await this._deleteObjByExId(exId, 'execution_controller', 'pods', force);
331
+ await this._deleteObjByExId(exId, 'execution_controller', 'services', force);
332
+ }
333
+ await this._deleteObjByExId(exId, 'execution_controller', 'jobs', force);
334
+ }
335
+ async _deleteObjByExId(exId, nodeType, objType, force) {
336
+ let objList;
337
+ const deleteResponses = [];
338
+ try {
339
+ objList = await this.list(`app.kubernetes.io/component=${nodeType},teraslice.terascope.io/exId=${exId}`, objType);
340
+ }
341
+ catch (e) {
342
+ const err = new Error(`Request ${objType} list in _deleteObjByExId with app.kubernetes.io/component: ${nodeType} and exId: ${exId} failed with: ${e}`);
343
+ this.logger.error(err);
344
+ return Promise.reject(err);
345
+ }
346
+ if (isEmpty(objList.items)) {
347
+ this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} has already been deleted`);
348
+ return Promise.resolve();
349
+ }
350
+ for (const obj of objList.items) {
351
+ const name = obj.metadata.name;
352
+ const deletionTimestamp = obj.metadata.deletionTimestamp;
353
+ // If deletionTimestamp is present then the resource is already terminating.
354
+ // K8s will not change the grace period in this case, so force deletion is not possible
355
+ if (force && deletionTimestamp) {
356
+ this.logger.warn(`Cannot force delete ${name} for ExId: ${exId}. It will finish deleting gracefully by ${deletionTimestamp}`);
357
+ return Promise.resolve();
358
+ }
359
+ this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} ${force ? 'force' : ''} deleting: ${name}`);
360
+ try {
361
+ deleteResponses.push(await this.delete(name, objType, force));
362
+ }
363
+ catch (e) {
364
+ const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${name} failed with: ${e}`);
365
+ this.logger.error(err);
366
+ return Promise.reject(err);
367
+ }
368
+ }
369
+ return deleteResponses;
370
+ }
371
+ /**
372
+ * Scales the k8s deployment for the specified exId to the desired number
373
+ * of workers.
374
+ * @param {String} exId exId of execution to scale
375
+ * @param {number} numWorkers number of workers to scale by
376
+ * @param {ScaleOp} op Scale operation: `set`, `add`, `remove`
377
+ * @return {Promise<k8s.V1Deployment>} Body of patch response.
378
+ */
379
+ async scaleExecution(exId, numWorkers, op) {
380
+ let newScale;
381
+ const selector = `app.kubernetes.io/component=worker,teraslice.terascope.io/exId=${exId}`;
382
+ this.logger.info(`Scaling exId: ${exId}, op: ${op}, numWorkers: ${numWorkers}`);
383
+ const listResponse = await this.list(selector, 'deployments');
384
+ this.logger.debug(`k8s worker query listResponse: ${JSON.stringify(listResponse)}`);
385
+ // the selector provided to list above should always result in a single
386
+ // deployment in the response.
387
+ if (listResponse.items.length === 0) {
388
+ const msg = `Teraslice deployment matching the following selector was not found: ${selector}`;
389
+ this.logger.warn(msg);
390
+ throw new TSError(msg);
391
+ }
392
+ else if (listResponse.items.length > 1) {
393
+ throw new TSError(`Unexpected number of Teraslice deployments matching the following selector: ${selector}`);
394
+ }
395
+ const workerDeployment = listResponse.items[0];
396
+ this.logger.info(`Current Scale for exId=${exId}: ${workerDeployment.spec.replicas}`);
397
+ if (op === 'set') {
398
+ newScale = numWorkers;
399
+ }
400
+ else if (op === 'add') {
401
+ newScale = workerDeployment.spec.replicas + numWorkers;
402
+ }
403
+ else if (op === 'remove') {
404
+ newScale = workerDeployment.spec.replicas - numWorkers;
405
+ }
406
+ else {
407
+ throw new Error('scaleExecution only accepts the following operations: add, remove, set');
408
+ }
409
+ this.logger.info(`New Scale for exId=${exId}: ${newScale}`);
410
+ const scalePatch = [
411
+ {
412
+ op: 'replace',
413
+ path: '/spec/replicas',
414
+ value: newScale
415
+ }
416
+ ];
417
+ const patchResponseBody = await this
418
+ .patch(scalePatch, workerDeployment.metadata.name);
419
+ this.logger.debug(`k8s.scaleExecution patchResponseBody: ${JSON.stringify(patchResponseBody)}`);
420
+ return patchResponseBody;
421
+ }
422
+ }
423
+ //# sourceMappingURL=k8s.js.map
@@ -0,0 +1,60 @@
1
+ import { V1Deployment } from '@kubernetes/client-node';
2
+ import { convertToTSResource, makeTemplate } from './utils.js';
3
+ import { K8sResource } from './k8sResource.js';
4
+ export class K8sDeploymentResource extends K8sResource {
5
+ nodeType = 'worker';
6
+ nameInfix = 'wkr';
7
+ templateGenerator;
8
+ templateConfig;
9
+ resource;
10
+ exName;
11
+ exUid;
12
+ /**
13
+ * K8sDeploymentResource allows the generation of a k8s deployment based on a template.
14
+ * After creating the object, the k8s deployment is accessible on the objects
15
+ * .resource property.
16
+ *
17
+ * @param {Object} terasliceConfig - teraslice cluster config from context
18
+ * @param {Object} execution - teraslice execution
19
+ * @param {Logger} logger - teraslice logger
20
+ * @param {String} exName - name from execution resource
21
+ * @param {String} exUid - uid from execution resource
22
+ */
23
+ constructor(terasliceConfig, execution, logger, exName, exUid) {
24
+ super(terasliceConfig, execution, logger);
25
+ this.execution = execution;
26
+ this.logger = logger;
27
+ this.terasliceConfig = terasliceConfig;
28
+ this.exName = exName;
29
+ this.exUid = exUid;
30
+ this.templateGenerator = makeTemplate('deployments', this.nodeType);
31
+ this.templateConfig = this._makeConfig(this.nameInfix, exName, exUid);
32
+ const k8sDeployment = new V1Deployment();
33
+ Object.assign(k8sDeployment, this.templateGenerator(this.templateConfig));
34
+ this.resource = convertToTSResource(k8sDeployment);
35
+ this._setJobLabels(this.resource);
36
+ // Apply job `targets` setting as k8s nodeAffinity
37
+ // We assume that multiple targets require both to match ...
38
+ // NOTE: If you specify multiple `matchExpressions` associated with
39
+ // `nodeSelectorTerms`, then the pod can be scheduled onto a node
40
+ // only if *all* `matchExpressions` can be satisfied.
41
+ this._setTargets(this.resource);
42
+ this._setResources(this.resource);
43
+ this._setVolumes(this.resource);
44
+ if (process.env.MOUNT_LOCAL_TERASLICE !== undefined) {
45
+ this._mountLocalTeraslice(this.resource);
46
+ }
47
+ this._setEnvVariables();
48
+ this._setAssetsVolume(this.resource);
49
+ this._setImagePullSecret(this.resource);
50
+ this._setEphemeralStorage(this.resource);
51
+ this._setExternalPorts(this.resource);
52
+ this._setPriorityClassName(this.resource);
53
+ this._setWorkerAntiAffinity(this.resource);
54
+ // override must happen last
55
+ if (this.terasliceConfig.kubernetes_overrides_enabled) {
56
+ this._mergePodSpecOverlay(this.resource);
57
+ }
58
+ }
59
+ }
60
+ //# sourceMappingURL=k8sDeploymentResource.js.map