@kapeta/local-cluster-service 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/definitions.d.ts +1 -1
- package/dist/cjs/index.js +3 -3
- package/dist/cjs/src/assetManager.js +7 -4
- package/dist/cjs/src/attachments/routes.js +4 -4
- package/dist/cjs/src/clusterService.js +2 -0
- package/dist/cjs/src/codeGeneratorManager.js +3 -3
- package/dist/cjs/src/config/routes.js +1 -1
- package/dist/cjs/src/configManager.js +13 -1
- package/dist/cjs/src/containerManager.d.ts +22 -2
- package/dist/cjs/src/containerManager.js +42 -15
- package/dist/cjs/src/definitionsManager.d.ts +11 -0
- package/dist/cjs/src/definitionsManager.js +44 -0
- package/dist/cjs/src/filesystemManager.js +0 -2
- package/dist/cjs/src/instanceManager.d.ts +23 -47
- package/dist/cjs/src/instanceManager.js +416 -235
- package/dist/cjs/src/instances/routes.js +23 -14
- package/dist/cjs/src/middleware/kapeta.js +7 -0
- package/dist/cjs/src/networkManager.js +6 -0
- package/dist/cjs/src/operatorManager.js +8 -4
- package/dist/cjs/src/providerManager.js +3 -3
- package/dist/cjs/src/repositoryManager.js +7 -3
- package/dist/cjs/src/serviceManager.js +5 -0
- package/dist/cjs/src/types.d.ts +39 -13
- package/dist/cjs/src/types.js +28 -0
- package/dist/cjs/src/utils/BlockInstanceRunner.d.ts +3 -3
- package/dist/cjs/src/utils/BlockInstanceRunner.js +28 -29
- package/dist/cjs/src/utils/utils.d.ts +2 -0
- package/dist/cjs/src/utils/utils.js +18 -2
- package/dist/esm/index.js +4 -4
- package/dist/esm/src/assetManager.js +7 -4
- package/dist/esm/src/attachments/routes.js +5 -5
- package/dist/esm/src/clusterService.js +2 -0
- package/dist/esm/src/codeGeneratorManager.js +3 -3
- package/dist/esm/src/config/routes.js +1 -1
- package/dist/esm/src/configManager.js +13 -1
- package/dist/esm/src/containerManager.d.ts +22 -2
- package/dist/esm/src/containerManager.js +41 -14
- package/dist/esm/src/definitionsManager.d.ts +11 -0
- package/dist/esm/src/definitionsManager.js +38 -0
- package/dist/esm/src/filesystemManager.js +0 -2
- package/dist/esm/src/instanceManager.d.ts +23 -47
- package/dist/esm/src/instanceManager.js +416 -236
- package/dist/esm/src/instances/routes.js +23 -14
- package/dist/esm/src/middleware/kapeta.js +7 -0
- package/dist/esm/src/networkManager.js +6 -0
- package/dist/esm/src/operatorManager.js +8 -4
- package/dist/esm/src/providerManager.js +3 -3
- package/dist/esm/src/repositoryManager.js +7 -3
- package/dist/esm/src/serviceManager.js +5 -0
- package/dist/esm/src/types.d.ts +39 -13
- package/dist/esm/src/types.js +27 -1
- package/dist/esm/src/utils/BlockInstanceRunner.d.ts +3 -3
- package/dist/esm/src/utils/BlockInstanceRunner.js +29 -30
- package/dist/esm/src/utils/utils.d.ts +2 -0
- package/dist/esm/src/utils/utils.js +15 -1
- package/index.ts +10 -8
- package/package.json +2 -1
- package/src/assetManager.ts +7 -4
- package/src/attachments/routes.ts +8 -8
- package/src/clusterService.ts +3 -0
- package/src/codeGeneratorManager.ts +3 -2
- package/src/config/routes.ts +1 -1
- package/src/configManager.ts +13 -1
- package/src/containerManager.ts +63 -16
- package/src/definitionsManager.ts +54 -0
- package/src/filesystemManager.ts +0 -2
- package/src/identities/routes.ts +2 -3
- package/src/instanceManager.ts +495 -266
- package/src/instances/routes.ts +23 -17
- package/src/middleware/kapeta.ts +10 -0
- package/src/networkManager.ts +6 -0
- package/src/operatorManager.ts +11 -6
- package/src/providerManager.ts +3 -2
- package/src/repositoryManager.ts +14 -10
- package/src/serviceManager.ts +6 -0
- package/src/storageService.ts +1 -1
- package/src/types.ts +44 -14
- package/src/utils/BlockInstanceRunner.ts +34 -34
- package/src/utils/utils.ts +20 -2
package/src/instanceManager.ts
CHANGED
@@ -1,204 +1,153 @@
|
|
1
1
|
import _ from 'lodash';
|
2
2
|
import request from 'request';
|
3
|
-
import EventEmitter from 'events';
|
4
3
|
import { BlockInstanceRunner } from './utils/BlockInstanceRunner';
|
5
4
|
import { storageService } from './storageService';
|
6
5
|
import { socketManager } from './socketManager';
|
7
6
|
import { serviceManager } from './serviceManager';
|
8
7
|
import { assetManager } from './assetManager';
|
9
|
-
import { containerManager } from './containerManager';
|
8
|
+
import { containerManager, HEALTH_CHECK_TIMEOUT } from './containerManager';
|
10
9
|
import { configManager } from './configManager';
|
11
|
-
import { InstanceInfo,
|
10
|
+
import { DesiredInstanceStatus, InstanceInfo, InstanceOwner, InstanceStatus, InstanceType, LogEntry } from './types';
|
12
11
|
import { BlockInstance } from '@kapeta/schemas';
|
12
|
+
import { getBlockInstanceContainerName, normalizeKapetaUri } from './utils/utils';
|
13
13
|
|
14
|
-
const CHECK_INTERVAL =
|
14
|
+
const CHECK_INTERVAL = 5000;
|
15
15
|
const DEFAULT_HEALTH_PORT_TYPE = 'rest';
|
16
16
|
|
17
17
|
const EVENT_STATUS_CHANGED = 'status-changed';
|
18
18
|
const EVENT_INSTANCE_CREATED = 'instance-created';
|
19
19
|
const EVENT_INSTANCE_EXITED = 'instance-exited';
|
20
20
|
const EVENT_INSTANCE_LOG = 'instance-log';
|
21
|
-
|
22
|
-
const STATUS_STARTING = 'starting';
|
23
|
-
const STATUS_READY = 'ready';
|
24
|
-
const STATUS_UNHEALTHY = 'unhealthy';
|
25
|
-
const STATUS_STOPPED = 'stopped';
|
26
|
-
|
27
21
|
const MIN_TIME_RUNNING = 30000; //If something didnt run for more than 30 secs - it failed
|
28
22
|
|
29
|
-
class InstanceManager {
|
30
|
-
private _interval: NodeJS.Timer;
|
31
|
-
|
32
|
-
/**
|
33
|
-
* Contains an array of running instances that have self-registered with this
|
34
|
-
* cluster service. This is done by the Kapeta SDKs
|
35
|
-
*/
|
36
|
-
private _instances: InstanceInfo[] = [];
|
23
|
+
export class InstanceManager {
|
24
|
+
private _interval: NodeJS.Timer | undefined = undefined;
|
37
25
|
|
38
|
-
|
39
|
-
* Contains the process info for the instances started by this manager. In memory only
|
40
|
-
* so can't be relied on for knowing everything that's running.
|
41
|
-
*
|
42
|
-
*/
|
43
|
-
private _processes: { [systemId: string]: { [instanceId: string]: ProcessInfo } } = {};
|
26
|
+
private readonly _instances: InstanceInfo[] = [];
|
44
27
|
|
45
28
|
constructor() {
|
46
|
-
this._interval = setInterval(() => this._checkInstances(), CHECK_INTERVAL);
|
47
29
|
this._instances = storageService.section('instances', []);
|
48
|
-
this._processes = {};
|
49
|
-
|
50
|
-
this._checkInstances();
|
51
|
-
}
|
52
30
|
|
53
|
-
|
54
|
-
|
31
|
+
// We need to wait a bit before running the first check
|
32
|
+
this.checkInstancesLater(1000);
|
55
33
|
}
|
56
34
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
const newStatus = await this._getInstanceStatus(instance);
|
35
|
+
private checkInstancesLater(time = CHECK_INTERVAL) {
|
36
|
+
if (this._interval) {
|
37
|
+
clearTimeout(this._interval);
|
38
|
+
}
|
63
39
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
40
|
+
this._interval = setTimeout(async () => {
|
41
|
+
await this.checkInstances();
|
42
|
+
this.checkInstancesLater();
|
43
|
+
}, time);
|
44
|
+
}
|
69
45
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
'Instance status changed: %s %s -> %s',
|
74
|
-
instance.systemId,
|
75
|
-
instance.instanceId,
|
76
|
-
instance.status
|
77
|
-
);
|
78
|
-
this._emit(instance.systemId, EVENT_STATUS_CHANGED, instance);
|
79
|
-
changed = true;
|
80
|
-
}
|
46
|
+
public getInstances() {
|
47
|
+
if (!this._instances) {
|
48
|
+
return [];
|
81
49
|
}
|
82
50
|
|
83
|
-
|
84
|
-
this._save();
|
85
|
-
}
|
51
|
+
return [...this._instances];
|
86
52
|
}
|
87
53
|
|
88
|
-
|
89
|
-
if (!
|
90
|
-
return;
|
54
|
+
public getInstancesForPlan(systemId: string) {
|
55
|
+
if (!this._instances) {
|
56
|
+
return [];
|
91
57
|
}
|
92
58
|
|
93
|
-
|
94
|
-
const container = await containerManager.get(instance.pid as string);
|
95
|
-
if (!container) {
|
96
|
-
console.warn('Container not found: %s', instance.pid);
|
97
|
-
return false;
|
98
|
-
}
|
99
|
-
return await container.isRunning();
|
100
|
-
}
|
59
|
+
systemId = normalizeKapetaUri(systemId);
|
101
60
|
|
102
|
-
|
103
|
-
//TODO: Handle for Windows
|
104
|
-
try {
|
105
|
-
return process.kill(instance.pid as number, 0);
|
106
|
-
} catch (err: any) {
|
107
|
-
return err.code === 'EPERM';
|
108
|
-
}
|
61
|
+
return this._instances.filter((instance) => instance.systemId === systemId);
|
109
62
|
}
|
110
63
|
|
111
|
-
|
112
|
-
|
113
|
-
//Will only change when it reregisters
|
114
|
-
return STATUS_STOPPED;
|
115
|
-
}
|
64
|
+
public getInstance(systemId: string, instanceId: string) {
|
65
|
+
systemId = normalizeKapetaUri(systemId);
|
116
66
|
|
117
|
-
|
118
|
-
|
119
|
-
}
|
67
|
+
return this._instances.find((i) => i.systemId === systemId && i.instanceId === instanceId);
|
68
|
+
}
|
120
69
|
|
121
|
-
|
122
|
-
|
123
|
-
|
70
|
+
public async saveInternalInstance(instance: InstanceInfo) {
|
71
|
+
instance.systemId = normalizeKapetaUri(instance.systemId);
|
72
|
+
if (instance.ref) {
|
73
|
+
instance.ref = normalizeKapetaUri(instance.ref);
|
124
74
|
}
|
125
75
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
if (err) {
|
133
|
-
resolve(STATUS_UNHEALTHY);
|
134
|
-
return;
|
135
|
-
}
|
136
|
-
|
137
|
-
if (response.statusCode > 399) {
|
138
|
-
resolve(STATUS_UNHEALTHY);
|
139
|
-
return;
|
140
|
-
}
|
76
|
+
//Get target address
|
77
|
+
let address = await serviceManager.getProviderAddress(
|
78
|
+
instance.systemId,
|
79
|
+
instance.instanceId,
|
80
|
+
instance.portType ?? DEFAULT_HEALTH_PORT_TYPE
|
81
|
+
);
|
141
82
|
|
142
|
-
|
143
|
-
});
|
144
|
-
});
|
145
|
-
}
|
83
|
+
const healthUrl = this.getHealthUrl(instance, address);
|
146
84
|
|
147
|
-
|
148
|
-
if (
|
149
|
-
|
85
|
+
instance.address = address;
|
86
|
+
if (healthUrl) {
|
87
|
+
instance.health = healthUrl;
|
150
88
|
}
|
151
89
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
90
|
+
let existingInstance = this.getInstance(instance.systemId, instance.instanceId);
|
91
|
+
if (existingInstance) {
|
92
|
+
const ix = this._instances.indexOf(existingInstance);
|
93
|
+
this._instances.splice(ix, 1, instance);
|
94
|
+
this.emitSystemEvent(instance.systemId, EVENT_STATUS_CHANGED, instance);
|
95
|
+
} else {
|
96
|
+
this._instances.push(instance);
|
97
|
+
this.emitSystemEvent(instance.systemId, EVENT_INSTANCE_CREATED, instance);
|
158
98
|
}
|
159
99
|
|
160
|
-
|
161
|
-
}
|
100
|
+
this.save();
|
162
101
|
|
163
|
-
|
164
|
-
* Get instance information
|
165
|
-
*
|
166
|
-
* @param {string} systemId
|
167
|
-
* @param {string} instanceId
|
168
|
-
* @return {*}
|
169
|
-
*/
|
170
|
-
getInstance(systemId: string, instanceId: string) {
|
171
|
-
return _.find(this._instances, { systemId, instanceId });
|
102
|
+
return instance;
|
172
103
|
}
|
173
104
|
|
174
105
|
/**
|
175
|
-
*
|
176
|
-
*
|
177
|
-
* @param {string} instanceId
|
178
|
-
* @param {InstanceInfo} info
|
179
|
-
* @return {Promise<void>}
|
106
|
+
* Method is called when instance is started from the Kapeta SDKs (e.g. NodeJS SDK)
|
107
|
+
* which self-registers with the cluster service locally on startup.
|
180
108
|
*/
|
181
|
-
async
|
109
|
+
public async registerInstanceFromSDK(
|
110
|
+
systemId: string,
|
111
|
+
instanceId: string,
|
112
|
+
info: Omit<InstanceInfo, 'systemId' | 'instanceId'>
|
113
|
+
) {
|
114
|
+
systemId = normalizeKapetaUri(systemId);
|
115
|
+
|
182
116
|
let instance = this.getInstance(systemId, instanceId);
|
183
117
|
|
184
118
|
//Get target address
|
185
|
-
|
119
|
+
const address = await serviceManager.getProviderAddress(
|
186
120
|
systemId,
|
187
121
|
instanceId,
|
188
122
|
info.portType ?? DEFAULT_HEALTH_PORT_TYPE
|
189
123
|
);
|
190
124
|
|
191
|
-
|
192
|
-
let health = info.health;
|
193
|
-
if (health) {
|
194
|
-
if (health.startsWith('/')) {
|
195
|
-
health = health.substring(1);
|
196
|
-
}
|
197
|
-
healthUrl = address + health;
|
198
|
-
}
|
125
|
+
const healthUrl = this.getHealthUrl(info, address);
|
199
126
|
|
200
127
|
if (instance) {
|
201
|
-
instance.status
|
128
|
+
if (instance.status === InstanceStatus.STOPPING && instance.desiredStatus === DesiredInstanceStatus.STOP) {
|
129
|
+
//If instance is stopping do not interfere
|
130
|
+
return;
|
131
|
+
}
|
132
|
+
|
133
|
+
if (info.owner === InstanceOwner.EXTERNAL) {
|
134
|
+
//If instance was started externally - then we want to replace the internal instance with that
|
135
|
+
if (
|
136
|
+
instance.owner === InstanceOwner.INTERNAL &&
|
137
|
+
(instance.status === InstanceStatus.READY ||
|
138
|
+
instance.status === InstanceStatus.STARTING ||
|
139
|
+
instance.status === InstanceStatus.UNHEALTHY)
|
140
|
+
) {
|
141
|
+
throw new Error(`Instance ${instanceId} is already running`);
|
142
|
+
}
|
143
|
+
|
144
|
+
instance.desiredStatus = info.desiredStatus;
|
145
|
+
instance.owner = info.owner;
|
146
|
+
instance.internal = undefined;
|
147
|
+
instance.status = InstanceStatus.STARTING;
|
148
|
+
instance.startedAt = Date.now();
|
149
|
+
}
|
150
|
+
|
202
151
|
instance.pid = info.pid;
|
203
152
|
instance.address = address;
|
204
153
|
if (info.type) {
|
@@ -207,63 +156,73 @@ class InstanceManager {
|
|
207
156
|
if (healthUrl) {
|
208
157
|
instance.health = healthUrl;
|
209
158
|
}
|
210
|
-
|
159
|
+
|
160
|
+
this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
|
211
161
|
} else {
|
162
|
+
//If instance was not found - then we're receiving an externally started instance
|
212
163
|
instance = {
|
164
|
+
...info,
|
213
165
|
systemId,
|
214
166
|
instanceId,
|
215
|
-
status:
|
216
|
-
|
217
|
-
|
167
|
+
status: InstanceStatus.STARTING,
|
168
|
+
startedAt: Date.now(),
|
169
|
+
desiredStatus: DesiredInstanceStatus.EXTERNAL,
|
170
|
+
owner: InstanceOwner.EXTERNAL,
|
218
171
|
health: healthUrl,
|
219
172
|
address,
|
220
173
|
};
|
221
174
|
|
222
175
|
this._instances.push(instance);
|
223
176
|
|
224
|
-
this.
|
177
|
+
this.emitSystemEvent(systemId, EVENT_INSTANCE_CREATED, instance);
|
225
178
|
}
|
226
179
|
|
227
|
-
this.
|
180
|
+
this.save();
|
181
|
+
|
182
|
+
return instance;
|
228
183
|
}
|
229
184
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
185
|
+
private getHealthUrl(info: Omit<InstanceInfo, 'systemId' | 'instanceId'>, address: string) {
|
186
|
+
let healthUrl = null;
|
187
|
+
let health = info.health;
|
188
|
+
if (health) {
|
189
|
+
if (health.startsWith('/')) {
|
190
|
+
health = health.substring(1);
|
191
|
+
}
|
192
|
+
healthUrl = address + health;
|
238
193
|
}
|
194
|
+
return healthUrl;
|
239
195
|
}
|
240
196
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
197
|
+
public markAsStopped(systemId: string, instanceId: string) {
|
198
|
+
systemId = normalizeKapetaUri(systemId);
|
199
|
+
const instance = _.find(this._instances, { systemId, instanceId });
|
200
|
+
if (instance && instance.owner === InstanceOwner.EXTERNAL && instance.status !== InstanceStatus.STOPPED) {
|
201
|
+
instance.status = InstanceStatus.STOPPED;
|
202
|
+
instance.pid = null;
|
203
|
+
instance.health = null;
|
204
|
+
this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
|
205
|
+
this.save();
|
246
206
|
}
|
247
207
|
}
|
248
208
|
|
249
|
-
async
|
250
|
-
|
251
|
-
|
252
|
-
const plan = await assetManager.getPlan(planRef, true);
|
209
|
+
public async startAllForPlan(systemId: string): Promise<InstanceInfo[]> {
|
210
|
+
systemId = normalizeKapetaUri(systemId);
|
211
|
+
const plan = await assetManager.getPlan(systemId, true);
|
253
212
|
if (!plan) {
|
254
|
-
throw new Error('Plan not found: ' +
|
213
|
+
throw new Error('Plan not found: ' + systemId);
|
255
214
|
}
|
256
215
|
|
257
216
|
if (!plan.spec.blocks) {
|
258
|
-
console.warn('No blocks found in plan',
|
217
|
+
console.warn('No blocks found in plan', systemId);
|
259
218
|
return [];
|
260
219
|
}
|
261
220
|
|
262
|
-
let promises: Promise<
|
221
|
+
let promises: Promise<InstanceInfo>[] = [];
|
263
222
|
let errors = [];
|
264
223
|
for (let blockInstance of Object.values(plan.spec.blocks as BlockInstance[])) {
|
265
224
|
try {
|
266
|
-
promises.push(this.
|
225
|
+
promises.push(this.start(systemId, blockInstance.id));
|
267
226
|
} catch (e) {
|
268
227
|
errors.push(e);
|
269
228
|
}
|
@@ -275,64 +234,76 @@ class InstanceManager {
|
|
275
234
|
throw errors[0];
|
276
235
|
}
|
277
236
|
|
278
|
-
return settled.map((p) => (p.status === 'fulfilled' ? p.value : null)).filter((p) => !!p) as
|
237
|
+
return settled.map((p) => (p.status === 'fulfilled' ? p.value : null)).filter((p) => !!p) as InstanceInfo[];
|
279
238
|
}
|
280
239
|
|
281
|
-
async
|
282
|
-
|
240
|
+
public async stop(systemId: string, instanceId: string) {
|
241
|
+
systemId = normalizeKapetaUri(systemId);
|
242
|
+
const instance = this.getInstance(systemId, instanceId);
|
243
|
+
if (!instance) {
|
283
244
|
return;
|
284
245
|
}
|
285
246
|
|
286
|
-
if (instance.status ===
|
247
|
+
if (instance.status === InstanceStatus.STOPPED) {
|
287
248
|
return;
|
288
249
|
}
|
289
250
|
|
251
|
+
if (instance.desiredStatus !== DesiredInstanceStatus.EXTERNAL) {
|
252
|
+
instance.desiredStatus = DesiredInstanceStatus.STOP;
|
253
|
+
}
|
254
|
+
|
255
|
+
instance.status = InstanceStatus.STOPPING;
|
256
|
+
|
257
|
+
this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
|
258
|
+
console.log('Stopping instance: %s::%s [desired: %s]', systemId, instanceId, instance.desiredStatus);
|
259
|
+
this.save();
|
260
|
+
|
290
261
|
try {
|
291
262
|
if (instance.type === 'docker') {
|
292
|
-
const
|
263
|
+
const containerName = getBlockInstanceContainerName(instance.instanceId);
|
264
|
+
const container = await containerManager.getContainerByName(containerName);
|
293
265
|
if (container) {
|
294
266
|
try {
|
295
267
|
await container.stop();
|
268
|
+
instance.status = InstanceStatus.STOPPED;
|
269
|
+
this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
|
270
|
+
this.save();
|
296
271
|
} catch (e) {
|
297
272
|
console.error('Failed to stop container', e);
|
298
273
|
}
|
274
|
+
} else {
|
275
|
+
console.warn('Container not found', containerName);
|
299
276
|
}
|
300
277
|
return;
|
301
278
|
}
|
279
|
+
|
280
|
+
if (!instance.pid) {
|
281
|
+
instance.status = InstanceStatus.STOPPED;
|
282
|
+
this.save();
|
283
|
+
return;
|
284
|
+
}
|
285
|
+
|
302
286
|
process.kill(instance.pid as number, 'SIGTERM');
|
287
|
+
instance.status = InstanceStatus.STOPPED;
|
288
|
+
this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
|
289
|
+
this.save();
|
303
290
|
} catch (e) {
|
304
291
|
console.error('Failed to stop process', e);
|
305
292
|
}
|
306
293
|
}
|
307
294
|
|
308
|
-
async stopAllForPlan(
|
309
|
-
|
310
|
-
|
311
|
-
console.log('Stopping all processes for plan', planRef);
|
312
|
-
for (let instance of Object.values(this._processes[planRef])) {
|
313
|
-
promises.push(instance.stop());
|
314
|
-
}
|
315
|
-
|
316
|
-
await Promise.all(promises);
|
295
|
+
public async stopAllForPlan(systemId: string) {
|
296
|
+
systemId = normalizeKapetaUri(systemId);
|
297
|
+
const instancesForPlan = this._instances.filter((instance) => instance.systemId === systemId);
|
317
298
|
|
318
|
-
|
319
|
-
}
|
320
|
-
|
321
|
-
//Also stop instances not being maintained by the cluster service
|
322
|
-
const instancesForPlan = this._instances.filter((instance) => instance.systemId === planRef);
|
323
|
-
|
324
|
-
const promises = [];
|
325
|
-
for (let instance of instancesForPlan) {
|
326
|
-
promises.push(this._stopInstance(instance));
|
327
|
-
}
|
328
|
-
|
329
|
-
await Promise.all(promises);
|
299
|
+
return this.stopInstances(instancesForPlan);
|
330
300
|
}
|
331
301
|
|
332
|
-
async
|
333
|
-
|
302
|
+
public async start(systemId: string, instanceId: string): Promise<InstanceInfo> {
|
303
|
+
systemId = normalizeKapetaUri(systemId);
|
304
|
+
const plan = await assetManager.getPlan(systemId, true);
|
334
305
|
if (!plan) {
|
335
|
-
throw new Error('Plan not found: ' +
|
306
|
+
throw new Error('Plan not found: ' + systemId);
|
336
307
|
}
|
337
308
|
|
338
309
|
const blockInstance = plan.spec && plan.spec.blocks ? _.find(plan.spec.blocks, { id: instanceId }) : null;
|
@@ -340,42 +311,84 @@ class InstanceManager {
|
|
340
311
|
throw new Error('Block instance not found: ' + instanceId);
|
341
312
|
}
|
342
313
|
|
343
|
-
const blockRef = blockInstance.block.ref;
|
314
|
+
const blockRef = normalizeKapetaUri(blockInstance.block.ref);
|
344
315
|
|
345
316
|
const blockAsset = await assetManager.getAsset(blockRef, true);
|
346
|
-
const instanceConfig = await configManager.getConfigForSection(planRef, instanceId);
|
347
|
-
|
348
317
|
if (!blockAsset) {
|
349
318
|
throw new Error('Block not found: ' + blockRef);
|
350
319
|
}
|
351
320
|
|
352
|
-
|
353
|
-
|
321
|
+
const existingInstance = this.getInstance(systemId, instanceId);
|
322
|
+
|
323
|
+
if (existingInstance) {
|
324
|
+
if (existingInstance.status === InstanceStatus.READY) {
|
325
|
+
// Instance is already running
|
326
|
+
return existingInstance;
|
327
|
+
}
|
328
|
+
|
329
|
+
if (
|
330
|
+
existingInstance.desiredStatus === DesiredInstanceStatus.RUN &&
|
331
|
+
existingInstance.status === InstanceStatus.STARTING
|
332
|
+
) {
|
333
|
+
// Internal instance is already starting - don't start it again
|
334
|
+
return existingInstance;
|
335
|
+
}
|
336
|
+
|
337
|
+
if (
|
338
|
+
existingInstance.owner === InstanceOwner.EXTERNAL &&
|
339
|
+
existingInstance.status === InstanceStatus.STARTING
|
340
|
+
) {
|
341
|
+
// External instance is already starting - don't start it again
|
342
|
+
return existingInstance;
|
343
|
+
}
|
354
344
|
}
|
355
345
|
|
356
|
-
|
357
|
-
|
346
|
+
let instance: InstanceInfo = {
|
347
|
+
systemId,
|
348
|
+
instanceId,
|
349
|
+
ref: blockRef,
|
350
|
+
name: blockAsset.data.metadata.name,
|
351
|
+
desiredStatus: DesiredInstanceStatus.RUN,
|
352
|
+
owner: InstanceOwner.INTERNAL,
|
353
|
+
type: InstanceType.UNKNOWN,
|
354
|
+
status: InstanceStatus.STARTING,
|
355
|
+
startedAt: Date.now(),
|
356
|
+
};
|
357
|
+
|
358
|
+
console.log('Starting instance: %s::%s [desired: %s]', systemId, instanceId, instance.desiredStatus);
|
359
|
+
// Save the instance before starting it, so that we can track the status
|
360
|
+
await this.saveInternalInstance(instance);
|
361
|
+
|
362
|
+
if (existingInstance) {
|
363
|
+
// Check if the instance is already running - but after we've commmuicated the desired status
|
364
|
+
const currentStatus = await this.requestInstanceStatus(existingInstance);
|
365
|
+
if (currentStatus === InstanceStatus.READY) {
|
366
|
+
// Instance is already running
|
367
|
+
return existingInstance;
|
368
|
+
}
|
369
|
+
}
|
358
370
|
|
359
|
-
const
|
371
|
+
const instanceConfig = await configManager.getConfigForSection(systemId, instanceId);
|
372
|
+
const runner = new BlockInstanceRunner(systemId);
|
360
373
|
|
361
374
|
const startTime = Date.now();
|
362
375
|
try {
|
363
|
-
const
|
376
|
+
const processInfo = await runner.start(blockRef, instanceId, instanceConfig);
|
364
377
|
//emit stdout/stderr via sockets
|
365
|
-
|
378
|
+
processInfo.output.on('data', (data: Buffer) => {
|
366
379
|
const payload = {
|
367
380
|
source: 'stdout',
|
368
381
|
level: 'INFO',
|
369
382
|
message: data.toString(),
|
370
383
|
time: Date.now(),
|
371
384
|
};
|
372
|
-
this.
|
385
|
+
this.emitInstanceEvent(systemId, instanceId, EVENT_INSTANCE_LOG, payload);
|
373
386
|
});
|
374
387
|
|
375
|
-
|
388
|
+
processInfo.output.on('exit', (exitCode: number) => {
|
376
389
|
const timeRunning = Date.now() - startTime;
|
377
|
-
const instance = this.getInstance(
|
378
|
-
if (instance?.status ===
|
390
|
+
const instance = this.getInstance(systemId, instanceId);
|
391
|
+
if (instance?.status === InstanceStatus.READY) {
|
379
392
|
//It's already been running
|
380
393
|
return;
|
381
394
|
}
|
@@ -387,7 +400,13 @@ class InstanceManager {
|
|
387
400
|
}
|
388
401
|
|
389
402
|
if (exitCode !== 0 || timeRunning < MIN_TIME_RUNNING) {
|
390
|
-
this.
|
403
|
+
const instance = this.getInstance(systemId, instanceId);
|
404
|
+
if (instance) {
|
405
|
+
instance.status = InstanceStatus.FAILED;
|
406
|
+
this.save();
|
407
|
+
}
|
408
|
+
|
409
|
+
this.emitSystemEvent(systemId, EVENT_INSTANCE_EXITED, {
|
391
410
|
error: 'Failed to start instance',
|
392
411
|
status: EVENT_INSTANCE_EXITED,
|
393
412
|
instanceId: blockInstance.id,
|
@@ -395,15 +414,20 @@ class InstanceManager {
|
|
395
414
|
}
|
396
415
|
});
|
397
416
|
|
398
|
-
|
399
|
-
|
400
|
-
|
417
|
+
instance.status = InstanceStatus.READY;
|
418
|
+
|
419
|
+
return this.saveInternalInstance({
|
420
|
+
...instance,
|
421
|
+
type: processInfo.type,
|
422
|
+
pid: processInfo.pid ?? -1,
|
401
423
|
health: null,
|
402
|
-
portType:
|
403
|
-
status:
|
424
|
+
portType: processInfo.portType,
|
425
|
+
status: InstanceStatus.READY,
|
426
|
+
internal: {
|
427
|
+
logs: processInfo.logs,
|
428
|
+
output: processInfo.output,
|
429
|
+
},
|
404
430
|
});
|
405
|
-
|
406
|
-
return (this._processes[planRef][instanceId] = process);
|
407
431
|
} catch (e: any) {
|
408
432
|
console.warn('Failed to start instance', e);
|
409
433
|
const logs: LogEntry[] = [
|
@@ -415,83 +439,288 @@ class InstanceManager {
|
|
415
439
|
},
|
416
440
|
];
|
417
441
|
|
418
|
-
await this.
|
419
|
-
|
442
|
+
const out = await this.saveInternalInstance({
|
443
|
+
...instance,
|
444
|
+
type: InstanceType.LOCAL,
|
420
445
|
pid: null,
|
421
446
|
health: null,
|
422
447
|
portType: DEFAULT_HEALTH_PORT_TYPE,
|
423
|
-
status:
|
448
|
+
status: InstanceStatus.FAILED,
|
424
449
|
});
|
425
450
|
|
426
|
-
this.
|
451
|
+
this.emitInstanceEvent(systemId, instanceId, EVENT_INSTANCE_LOG, logs[0]);
|
427
452
|
|
428
|
-
this.
|
453
|
+
this.emitInstanceEvent(systemId, blockInstance.id, EVENT_INSTANCE_EXITED, {
|
429
454
|
error: `Failed to start instance: ${e.message}`,
|
430
455
|
status: EVENT_INSTANCE_EXITED,
|
431
456
|
instanceId: blockInstance.id,
|
432
457
|
});
|
433
458
|
|
434
|
-
return
|
435
|
-
pid: -1,
|
436
|
-
type,
|
437
|
-
logs: () => logs,
|
438
|
-
stop: () => Promise.resolve(),
|
439
|
-
ref: blockRef,
|
440
|
-
id: instanceId,
|
441
|
-
name: blockInstance.name,
|
442
|
-
output: new EventEmitter(),
|
443
|
-
});
|
459
|
+
return out;
|
444
460
|
}
|
445
461
|
}
|
446
462
|
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
* @param {string} instanceId
|
451
|
-
* @return {ProcessInfo|null}
|
452
|
-
*/
|
453
|
-
getProcessForInstance(planRef: string, instanceId: string) {
|
454
|
-
if (!this._processes[planRef]) {
|
455
|
-
return null;
|
456
|
-
}
|
463
|
+
public async restart(systemId: string, instanceId: string) {
|
464
|
+
systemId = normalizeKapetaUri(systemId);
|
465
|
+
await this.stop(systemId, instanceId);
|
457
466
|
|
458
|
-
return this.
|
467
|
+
return this.start(systemId, instanceId);
|
459
468
|
}
|
460
469
|
|
461
|
-
async
|
462
|
-
|
463
|
-
|
470
|
+
public async stopAll() {
|
471
|
+
return this.stopInstances(this._instances);
|
472
|
+
}
|
473
|
+
|
474
|
+
private async stopInstances(instances: InstanceInfo[]) {
|
475
|
+
const promises = instances.map((instance) => this.stop(instance.systemId, instance.instanceId));
|
476
|
+
await Promise.allSettled(promises);
|
477
|
+
this.save();
|
478
|
+
}
|
479
|
+
|
480
|
+
private save() {
|
481
|
+
try {
|
482
|
+
storageService.put(
|
483
|
+
'instances',
|
484
|
+
this._instances.map((instance) => {
|
485
|
+
const copy = { ...instance };
|
486
|
+
delete copy.internal;
|
487
|
+
return copy;
|
488
|
+
})
|
489
|
+
);
|
490
|
+
} catch (e) {
|
491
|
+
console.error('Failed to save instances', this._instances, e);
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
private async checkInstances() {
|
496
|
+
//console.log('\n## Checking instances:');
|
497
|
+
let changed = false;
|
498
|
+
const all = [...this._instances];
|
499
|
+
while (all.length > 0) {
|
500
|
+
// Check a few instances at a time - docker doesn't like too many concurrent requests
|
501
|
+
const chunk = all.splice(0, 20);
|
502
|
+
const promises = chunk.map(async (instance) => {
|
503
|
+
if (!instance.systemId) {
|
504
|
+
return;
|
505
|
+
}
|
506
|
+
|
507
|
+
instance.systemId = normalizeKapetaUri(instance.systemId);
|
508
|
+
if (instance.ref) {
|
509
|
+
instance.ref = normalizeKapetaUri(instance.ref);
|
510
|
+
}
|
511
|
+
|
512
|
+
const newStatus = await this.requestInstanceStatus(instance);
|
513
|
+
/*
|
514
|
+
console.log('Check instance %s %s: [current: %s, new: %s, desired: %s]',
|
515
|
+
instance.systemId, instance.instanceId, instance.status, newStatus, instance.desiredStatus);
|
516
|
+
*/
|
517
|
+
|
518
|
+
if (newStatus === InstanceStatus.BUSY) {
|
519
|
+
// If instance is busy we skip it
|
520
|
+
//console.log('Instance %s %s is busy', instance.systemId, instance.instanceId);
|
521
|
+
return;
|
522
|
+
}
|
523
|
+
|
524
|
+
if (
|
525
|
+
instance.startedAt !== undefined &&
|
526
|
+
newStatus === InstanceStatus.UNHEALTHY &&
|
527
|
+
instance.startedAt + HEALTH_CHECK_TIMEOUT < Date.now() &&
|
528
|
+
instance.status === InstanceStatus.STARTING
|
529
|
+
) {
|
530
|
+
// If instance is starting we consider unhealthy an indication
|
531
|
+
// that it is still starting
|
532
|
+
//console.log('Instance %s %s is still starting', instance.systemId, instance.instanceId);
|
533
|
+
return;
|
534
|
+
}
|
535
|
+
|
536
|
+
if (instance.status !== newStatus) {
|
537
|
+
const oldStatus = instance.status;
|
538
|
+
const skipUpdate =
|
539
|
+
(newStatus === InstanceStatus.STOPPED && instance.status === InstanceStatus.FAILED) ||
|
540
|
+
([InstanceStatus.READY, InstanceStatus.UNHEALTHY].includes(newStatus) &&
|
541
|
+
instance.status === InstanceStatus.STOPPING &&
|
542
|
+
instance.desiredStatus === DesiredInstanceStatus.STOP) ||
|
543
|
+
(newStatus === InstanceStatus.STOPPED &&
|
544
|
+
instance.status === InstanceStatus.STARTING &&
|
545
|
+
instance.desiredStatus === DesiredInstanceStatus.RUN);
|
546
|
+
|
547
|
+
if (!skipUpdate) {
|
548
|
+
const oldStatus = instance.status;
|
549
|
+
instance.status = newStatus;
|
550
|
+
console.log(
|
551
|
+
'Instance status changed: %s %s: %s -> %s',
|
552
|
+
instance.systemId,
|
553
|
+
instance.instanceId,
|
554
|
+
oldStatus,
|
555
|
+
instance.status
|
556
|
+
);
|
557
|
+
this.emitSystemEvent(instance.systemId, EVENT_STATUS_CHANGED, instance);
|
558
|
+
changed = true;
|
559
|
+
}
|
560
|
+
}
|
561
|
+
|
562
|
+
if (instance.desiredStatus === DesiredInstanceStatus.RUN && newStatus === InstanceStatus.STOPPED) {
|
563
|
+
//If the instance is stopped but we want it to run, start it
|
564
|
+
try {
|
565
|
+
await this.start(instance.systemId, instance.instanceId);
|
566
|
+
} catch (e: any) {
|
567
|
+
console.warn('Failed to start instance', instance.systemId, instance.instanceId, e);
|
568
|
+
}
|
569
|
+
return;
|
570
|
+
}
|
571
|
+
|
572
|
+
if (instance.desiredStatus === DesiredInstanceStatus.STOP && newStatus === InstanceStatus.READY) {
|
573
|
+
//If the instance is running but we want it to stop, stop it
|
574
|
+
try {
|
575
|
+
await this.stop(instance.systemId, instance.instanceId);
|
576
|
+
} catch (e) {
|
577
|
+
console.warn('Failed to stop instance', instance.systemId, instance.instanceId, e);
|
578
|
+
}
|
579
|
+
return;
|
580
|
+
}
|
581
|
+
|
582
|
+
if (
|
583
|
+
instance.desiredStatus === DesiredInstanceStatus.RUN &&
|
584
|
+
instance.status !== newStatus &&
|
585
|
+
newStatus === InstanceStatus.UNHEALTHY
|
586
|
+
) {
|
587
|
+
//If the instance is unhealthy, try to restart it
|
588
|
+
console.log('Restarting unhealthy instance', instance);
|
589
|
+
try {
|
590
|
+
await this.restart(instance.systemId, instance.instanceId);
|
591
|
+
} catch (e) {
|
592
|
+
console.warn('Failed to restart instance', instance.systemId, instance.instanceId, e);
|
593
|
+
}
|
594
|
+
}
|
595
|
+
});
|
596
|
+
|
597
|
+
await Promise.allSettled(promises);
|
598
|
+
}
|
599
|
+
|
600
|
+
if (changed) {
|
601
|
+
this.save();
|
464
602
|
}
|
465
603
|
|
466
|
-
//
|
467
|
-
return this.createProcess(planRef, instanceId);
|
604
|
+
//console.log('\n##\n');
|
468
605
|
}
|
469
606
|
|
470
|
-
async
|
471
|
-
if (
|
472
|
-
|
607
|
+
private async getExternalStatus(instance: InstanceInfo): Promise<InstanceStatus> {
|
608
|
+
if (instance.type === InstanceType.DOCKER) {
|
609
|
+
const containerName = getBlockInstanceContainerName(instance.instanceId);
|
610
|
+
const container = await containerManager.getContainerByName(containerName);
|
611
|
+
if (!container) {
|
612
|
+
// If the container doesn't exist, we consider the instance stopped
|
613
|
+
return InstanceStatus.STOPPED;
|
614
|
+
}
|
615
|
+
const state = await container.status();
|
616
|
+
|
617
|
+
if (state.Status === 'running') {
|
618
|
+
if (state.Health?.Status === 'healthy') {
|
619
|
+
return InstanceStatus.READY;
|
620
|
+
}
|
621
|
+
if (state.Health?.Status === 'starting') {
|
622
|
+
return InstanceStatus.STARTING;
|
623
|
+
}
|
624
|
+
if (state.Health?.Status === 'unhealthy') {
|
625
|
+
return InstanceStatus.UNHEALTHY;
|
626
|
+
}
|
627
|
+
|
628
|
+
return InstanceStatus.READY;
|
629
|
+
}
|
630
|
+
if (state.Status === 'created') {
|
631
|
+
return InstanceStatus.STARTING;
|
632
|
+
}
|
633
|
+
|
634
|
+
if (state.Status === 'exited' || state.Status === 'dead') {
|
635
|
+
return InstanceStatus.STOPPED;
|
636
|
+
}
|
637
|
+
|
638
|
+
if (state.Status === 'removing') {
|
639
|
+
return InstanceStatus.BUSY;
|
640
|
+
}
|
641
|
+
|
642
|
+
if (state.Status === 'restarting') {
|
643
|
+
return InstanceStatus.BUSY;
|
644
|
+
}
|
645
|
+
|
646
|
+
if (state.Status === 'paused') {
|
647
|
+
return InstanceStatus.BUSY;
|
648
|
+
}
|
649
|
+
|
650
|
+
return InstanceStatus.STOPPED;
|
473
651
|
}
|
474
652
|
|
475
|
-
if (
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
653
|
+
if (!instance.pid) {
|
654
|
+
return InstanceStatus.STOPPED;
|
655
|
+
}
|
656
|
+
|
657
|
+
//Otherwise its just a normal process.
|
658
|
+
//TODO: Handle for Windows
|
659
|
+
try {
|
660
|
+
if (process.kill(instance.pid as number, 0)) {
|
661
|
+
return InstanceStatus.READY;
|
662
|
+
}
|
663
|
+
} catch (err: any) {
|
664
|
+
if (err.code === 'EPERM') {
|
665
|
+
return InstanceStatus.READY;
|
480
666
|
}
|
481
|
-
delete this._processes[planRef][instanceId];
|
482
667
|
}
|
668
|
+
|
669
|
+
return InstanceStatus.STOPPED;
|
483
670
|
}
|
484
671
|
|
485
|
-
async
|
486
|
-
|
487
|
-
|
488
|
-
|
672
|
+
private async requestInstanceStatus(instance: InstanceInfo): Promise<InstanceStatus> {
|
673
|
+
const externalStatus = await this.getExternalStatus(instance);
|
674
|
+
if (instance.type === InstanceType.DOCKER) {
|
675
|
+
// For docker instances we can rely on docker status
|
676
|
+
return externalStatus;
|
677
|
+
}
|
678
|
+
|
679
|
+
if (externalStatus === InstanceStatus.STOPPED) {
|
680
|
+
return externalStatus;
|
681
|
+
}
|
682
|
+
|
683
|
+
if (!instance.health) {
|
684
|
+
//No health url means we assume it's healthy as soon as it's running
|
685
|
+
return InstanceStatus.READY;
|
686
|
+
}
|
687
|
+
|
688
|
+
return new Promise((resolve) => {
|
689
|
+
if (!instance.health) {
|
690
|
+
resolve(InstanceStatus.READY);
|
691
|
+
return;
|
489
692
|
}
|
693
|
+
request(instance.health, (err, response) => {
|
694
|
+
if (err) {
|
695
|
+
resolve(InstanceStatus.UNHEALTHY);
|
696
|
+
return;
|
697
|
+
}
|
698
|
+
|
699
|
+
if (response.statusCode > 399) {
|
700
|
+
resolve(InstanceStatus.UNHEALTHY);
|
701
|
+
return;
|
702
|
+
}
|
703
|
+
|
704
|
+
resolve(InstanceStatus.READY);
|
705
|
+
});
|
706
|
+
});
|
707
|
+
}
|
708
|
+
|
709
|
+
private emitSystemEvent(systemId: string, type: string, payload: any) {
|
710
|
+
systemId = normalizeKapetaUri(systemId);
|
711
|
+
try {
|
712
|
+
socketManager.emit(`${systemId}/instances`, type, payload);
|
713
|
+
} catch (e: any) {
|
714
|
+
console.warn('Failed to emit instance event: %s', e.message);
|
490
715
|
}
|
491
|
-
|
716
|
+
}
|
492
717
|
|
493
|
-
|
494
|
-
|
718
|
+
private emitInstanceEvent(systemId: string, instanceId: string, type: string, payload: any) {
|
719
|
+
systemId = normalizeKapetaUri(systemId);
|
720
|
+
try {
|
721
|
+
socketManager.emit(`${systemId}/instances/${instanceId}`, type, payload);
|
722
|
+
} catch (e: any) {
|
723
|
+
console.warn('Failed to emit instance event: %s', e.message);
|
495
724
|
}
|
496
725
|
}
|
497
726
|
}
|
@@ -499,5 +728,5 @@ class InstanceManager {
|
|
499
728
|
export const instanceManager = new InstanceManager();
|
500
729
|
|
501
730
|
process.on('exit', async () => {
|
502
|
-
await instanceManager.
|
731
|
+
await instanceManager.stopAll();
|
503
732
|
});
|