@kapeta/local-cluster-service 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/dist/cjs/src/assetManager.js +7 -4
  3. package/dist/cjs/src/clusterService.js +2 -0
  4. package/dist/cjs/src/codeGeneratorManager.js +3 -3
  5. package/dist/cjs/src/config/routes.js +1 -1
  6. package/dist/cjs/src/configManager.js +13 -1
  7. package/dist/cjs/src/containerManager.d.ts +22 -2
  8. package/dist/cjs/src/containerManager.js +42 -15
  9. package/dist/cjs/src/definitionsManager.d.ts +11 -0
  10. package/dist/cjs/src/definitionsManager.js +44 -0
  11. package/dist/cjs/src/filesystemManager.js +0 -2
  12. package/dist/cjs/src/instanceManager.d.ts +23 -47
  13. package/dist/cjs/src/instanceManager.js +416 -235
  14. package/dist/cjs/src/instances/routes.js +23 -14
  15. package/dist/cjs/src/middleware/kapeta.js +7 -0
  16. package/dist/cjs/src/networkManager.js +6 -0
  17. package/dist/cjs/src/operatorManager.js +8 -4
  18. package/dist/cjs/src/providerManager.js +3 -3
  19. package/dist/cjs/src/repositoryManager.js +7 -3
  20. package/dist/cjs/src/serviceManager.js +5 -0
  21. package/dist/cjs/src/types.d.ts +39 -13
  22. package/dist/cjs/src/types.js +28 -0
  23. package/dist/cjs/src/utils/BlockInstanceRunner.d.ts +3 -3
  24. package/dist/cjs/src/utils/BlockInstanceRunner.js +27 -26
  25. package/dist/cjs/src/utils/utils.d.ts +2 -0
  26. package/dist/cjs/src/utils/utils.js +17 -1
  27. package/dist/esm/src/assetManager.js +7 -4
  28. package/dist/esm/src/clusterService.js +2 -0
  29. package/dist/esm/src/codeGeneratorManager.js +3 -3
  30. package/dist/esm/src/config/routes.js +1 -1
  31. package/dist/esm/src/configManager.js +13 -1
  32. package/dist/esm/src/containerManager.d.ts +22 -2
  33. package/dist/esm/src/containerManager.js +41 -14
  34. package/dist/esm/src/definitionsManager.d.ts +11 -0
  35. package/dist/esm/src/definitionsManager.js +38 -0
  36. package/dist/esm/src/filesystemManager.js +0 -2
  37. package/dist/esm/src/instanceManager.d.ts +23 -47
  38. package/dist/esm/src/instanceManager.js +416 -236
  39. package/dist/esm/src/instances/routes.js +23 -14
  40. package/dist/esm/src/middleware/kapeta.js +7 -0
  41. package/dist/esm/src/networkManager.js +6 -0
  42. package/dist/esm/src/operatorManager.js +8 -4
  43. package/dist/esm/src/providerManager.js +3 -3
  44. package/dist/esm/src/repositoryManager.js +7 -3
  45. package/dist/esm/src/serviceManager.js +5 -0
  46. package/dist/esm/src/types.d.ts +39 -13
  47. package/dist/esm/src/types.js +27 -1
  48. package/dist/esm/src/utils/BlockInstanceRunner.d.ts +3 -3
  49. package/dist/esm/src/utils/BlockInstanceRunner.js +28 -27
  50. package/dist/esm/src/utils/utils.d.ts +2 -0
  51. package/dist/esm/src/utils/utils.js +14 -0
  52. package/package.json +2 -1
  53. package/src/assetManager.ts +7 -4
  54. package/src/clusterService.ts +3 -0
  55. package/src/codeGeneratorManager.ts +3 -2
  56. package/src/config/routes.ts +1 -1
  57. package/src/configManager.ts +13 -1
  58. package/src/containerManager.ts +62 -15
  59. package/src/definitionsManager.ts +54 -0
  60. package/src/filesystemManager.ts +0 -2
  61. package/src/instanceManager.ts +495 -266
  62. package/src/instances/routes.ts +23 -17
  63. package/src/middleware/kapeta.ts +10 -0
  64. package/src/networkManager.ts +6 -0
  65. package/src/operatorManager.ts +11 -6
  66. package/src/providerManager.ts +3 -2
  67. package/src/repositoryManager.ts +7 -3
  68. package/src/serviceManager.ts +6 -0
  69. package/src/types.ts +44 -14
  70. package/src/utils/BlockInstanceRunner.ts +32 -30
  71. package/src/utils/utils.ts +18 -0
@@ -1,204 +1,153 @@
1
1
  import _ from 'lodash';
2
2
  import request from 'request';
3
- import EventEmitter from 'events';
4
3
  import { BlockInstanceRunner } from './utils/BlockInstanceRunner';
5
4
  import { storageService } from './storageService';
6
5
  import { socketManager } from './socketManager';
7
6
  import { serviceManager } from './serviceManager';
8
7
  import { assetManager } from './assetManager';
9
- import { containerManager } from './containerManager';
8
+ import { containerManager, HEALTH_CHECK_TIMEOUT } from './containerManager';
10
9
  import { configManager } from './configManager';
11
- import { InstanceInfo, LogEntry, ProcessInfo } from './types';
10
+ import { DesiredInstanceStatus, InstanceInfo, InstanceOwner, InstanceStatus, InstanceType, LogEntry } from './types';
12
11
  import { BlockInstance } from '@kapeta/schemas';
12
+ import { getBlockInstanceContainerName, normalizeKapetaUri } from './utils/utils';
13
13
 
14
- const CHECK_INTERVAL = 10000;
14
+ const CHECK_INTERVAL = 5000;
15
15
  const DEFAULT_HEALTH_PORT_TYPE = 'rest';
16
16
 
17
17
  const EVENT_STATUS_CHANGED = 'status-changed';
18
18
  const EVENT_INSTANCE_CREATED = 'instance-created';
19
19
  const EVENT_INSTANCE_EXITED = 'instance-exited';
20
20
  const EVENT_INSTANCE_LOG = 'instance-log';
21
-
22
- const STATUS_STARTING = 'starting';
23
- const STATUS_READY = 'ready';
24
- const STATUS_UNHEALTHY = 'unhealthy';
25
- const STATUS_STOPPED = 'stopped';
26
-
27
21
  const MIN_TIME_RUNNING = 30000; //If something didnt run for more than 30 secs - it failed
28
22
 
29
- class InstanceManager {
30
- private _interval: NodeJS.Timer;
31
-
32
- /**
33
- * Contains an array of running instances that have self-registered with this
34
- * cluster service. This is done by the Kapeta SDKs
35
- */
36
- private _instances: InstanceInfo[] = [];
23
+ export class InstanceManager {
24
+ private _interval: NodeJS.Timer | undefined = undefined;
37
25
 
38
- /**
39
- * Contains the process info for the instances started by this manager. In memory only
40
- * so can't be relied on for knowing everything that's running.
41
- *
42
- */
43
- private _processes: { [systemId: string]: { [instanceId: string]: ProcessInfo } } = {};
26
+ private readonly _instances: InstanceInfo[] = [];
44
27
 
45
28
  constructor() {
46
- this._interval = setInterval(() => this._checkInstances(), CHECK_INTERVAL);
47
29
  this._instances = storageService.section('instances', []);
48
- this._processes = {};
49
-
50
- this._checkInstances();
51
- }
52
30
 
53
- _save() {
54
- storageService.put('instances', this._instances);
31
+ // We need to wait a bit before running the first check
32
+ this.checkInstancesLater(1000);
55
33
  }
56
34
 
57
- async _checkInstances() {
58
- let changed = false;
59
- for (let i = 0; i < this._instances.length; i++) {
60
- const instance = this._instances[i];
61
-
62
- const newStatus = await this._getInstanceStatus(instance);
35
+ private checkInstancesLater(time = CHECK_INTERVAL) {
36
+ if (this._interval) {
37
+ clearTimeout(this._interval);
38
+ }
63
39
 
64
- if (newStatus === STATUS_UNHEALTHY && instance.status === STATUS_STARTING) {
65
- // If instance is starting we consider unhealthy an indication
66
- // that it is still starting
67
- continue;
68
- }
40
+ this._interval = setTimeout(async () => {
41
+ await this.checkInstances();
42
+ this.checkInstancesLater();
43
+ }, time);
44
+ }
69
45
 
70
- if (instance.status !== newStatus) {
71
- instance.status = newStatus;
72
- console.log(
73
- 'Instance status changed: %s %s -> %s',
74
- instance.systemId,
75
- instance.instanceId,
76
- instance.status
77
- );
78
- this._emit(instance.systemId, EVENT_STATUS_CHANGED, instance);
79
- changed = true;
80
- }
46
+ public getInstances() {
47
+ if (!this._instances) {
48
+ return [];
81
49
  }
82
50
 
83
- if (changed) {
84
- this._save();
85
- }
51
+ return [...this._instances];
86
52
  }
87
53
 
88
- async _isRunning(instance: InstanceInfo) {
89
- if (!instance.pid) {
90
- return;
54
+ public getInstancesForPlan(systemId: string) {
55
+ if (!this._instances) {
56
+ return [];
91
57
  }
92
58
 
93
- if (instance.type === 'docker') {
94
- const container = await containerManager.get(instance.pid as string);
95
- if (!container) {
96
- console.warn('Container not found: %s', instance.pid);
97
- return false;
98
- }
99
- return await container.isRunning();
100
- }
59
+ systemId = normalizeKapetaUri(systemId);
101
60
 
102
- //Otherwise its just a normal process.
103
- //TODO: Handle for Windows
104
- try {
105
- return process.kill(instance.pid as number, 0);
106
- } catch (err: any) {
107
- return err.code === 'EPERM';
108
- }
61
+ return this._instances.filter((instance) => instance.systemId === systemId);
109
62
  }
110
63
 
111
- async _getInstanceStatus(instance: InstanceInfo): Promise<string> {
112
- if (instance.status === STATUS_STOPPED) {
113
- //Will only change when it reregisters
114
- return STATUS_STOPPED;
115
- }
64
+ public getInstance(systemId: string, instanceId: string) {
65
+ systemId = normalizeKapetaUri(systemId);
116
66
 
117
- if (!(await this._isRunning(instance))) {
118
- return STATUS_STOPPED;
119
- }
67
+ return this._instances.find((i) => i.systemId === systemId && i.instanceId === instanceId);
68
+ }
120
69
 
121
- if (!instance.health) {
122
- //No health url means we assume it's healthy as soon as it's running
123
- return STATUS_READY;
70
+ public async saveInternalInstance(instance: InstanceInfo) {
71
+ instance.systemId = normalizeKapetaUri(instance.systemId);
72
+ if (instance.ref) {
73
+ instance.ref = normalizeKapetaUri(instance.ref);
124
74
  }
125
75
 
126
- return new Promise((resolve) => {
127
- if (!instance.health) {
128
- resolve(STATUS_READY);
129
- return;
130
- }
131
- request(instance.health, (err, response) => {
132
- if (err) {
133
- resolve(STATUS_UNHEALTHY);
134
- return;
135
- }
136
-
137
- if (response.statusCode > 399) {
138
- resolve(STATUS_UNHEALTHY);
139
- return;
140
- }
76
+ //Get target address
77
+ let address = await serviceManager.getProviderAddress(
78
+ instance.systemId,
79
+ instance.instanceId,
80
+ instance.portType ?? DEFAULT_HEALTH_PORT_TYPE
81
+ );
141
82
 
142
- resolve(STATUS_READY);
143
- });
144
- });
145
- }
83
+ const healthUrl = this.getHealthUrl(instance, address);
146
84
 
147
- getInstances() {
148
- if (!this._instances) {
149
- return [];
85
+ instance.address = address;
86
+ if (healthUrl) {
87
+ instance.health = healthUrl;
150
88
  }
151
89
 
152
- return [...this._instances];
153
- }
154
-
155
- getInstancesForPlan(systemId: string) {
156
- if (!this._instances) {
157
- return [];
90
+ let existingInstance = this.getInstance(instance.systemId, instance.instanceId);
91
+ if (existingInstance) {
92
+ const ix = this._instances.indexOf(existingInstance);
93
+ this._instances.splice(ix, 1, instance);
94
+ this.emitSystemEvent(instance.systemId, EVENT_STATUS_CHANGED, instance);
95
+ } else {
96
+ this._instances.push(instance);
97
+ this.emitSystemEvent(instance.systemId, EVENT_INSTANCE_CREATED, instance);
158
98
  }
159
99
 
160
- return this._instances.filter((instance) => instance.systemId === systemId);
161
- }
100
+ this.save();
162
101
 
163
- /**
164
- * Get instance information
165
- *
166
- * @param {string} systemId
167
- * @param {string} instanceId
168
- * @return {*}
169
- */
170
- getInstance(systemId: string, instanceId: string) {
171
- return _.find(this._instances, { systemId, instanceId });
102
+ return instance;
172
103
  }
173
104
 
174
105
  /**
175
- *
176
- * @param {string} systemId
177
- * @param {string} instanceId
178
- * @param {InstanceInfo} info
179
- * @return {Promise<void>}
106
+ * Method is called when instance is started from the Kapeta SDKs (e.g. NodeJS SDK)
107
+ * which self-registers with the cluster service locally on startup.
180
108
  */
181
- async registerInstance(systemId: string, instanceId: string, info: Omit<InstanceInfo, 'systemId' | 'instanceId'>) {
109
+ public async registerInstanceFromSDK(
110
+ systemId: string,
111
+ instanceId: string,
112
+ info: Omit<InstanceInfo, 'systemId' | 'instanceId'>
113
+ ) {
114
+ systemId = normalizeKapetaUri(systemId);
115
+
182
116
  let instance = this.getInstance(systemId, instanceId);
183
117
 
184
118
  //Get target address
185
- let address = await serviceManager.getProviderAddress(
119
+ const address = await serviceManager.getProviderAddress(
186
120
  systemId,
187
121
  instanceId,
188
122
  info.portType ?? DEFAULT_HEALTH_PORT_TYPE
189
123
  );
190
124
 
191
- let healthUrl = null;
192
- let health = info.health;
193
- if (health) {
194
- if (health.startsWith('/')) {
195
- health = health.substring(1);
196
- }
197
- healthUrl = address + health;
198
- }
125
+ const healthUrl = this.getHealthUrl(info, address);
199
126
 
200
127
  if (instance) {
201
- instance.status = STATUS_STARTING;
128
+ if (instance.status === InstanceStatus.STOPPING && instance.desiredStatus === DesiredInstanceStatus.STOP) {
129
+ //If instance is stopping do not interfere
130
+ return;
131
+ }
132
+
133
+ if (info.owner === InstanceOwner.EXTERNAL) {
134
+ //If instance was started externally - then we want to replace the internal instance with that
135
+ if (
136
+ instance.owner === InstanceOwner.INTERNAL &&
137
+ (instance.status === InstanceStatus.READY ||
138
+ instance.status === InstanceStatus.STARTING ||
139
+ instance.status === InstanceStatus.UNHEALTHY)
140
+ ) {
141
+ throw new Error(`Instance ${instanceId} is already running`);
142
+ }
143
+
144
+ instance.desiredStatus = info.desiredStatus;
145
+ instance.owner = info.owner;
146
+ instance.internal = undefined;
147
+ instance.status = InstanceStatus.STARTING;
148
+ instance.startedAt = Date.now();
149
+ }
150
+
202
151
  instance.pid = info.pid;
203
152
  instance.address = address;
204
153
  if (info.type) {
@@ -207,63 +156,73 @@ class InstanceManager {
207
156
  if (healthUrl) {
208
157
  instance.health = healthUrl;
209
158
  }
210
- this._emit(systemId, EVENT_STATUS_CHANGED, instance);
159
+
160
+ this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
211
161
  } else {
162
+ //If instance was not found - then we're receiving an externally started instance
212
163
  instance = {
164
+ ...info,
213
165
  systemId,
214
166
  instanceId,
215
- status: STATUS_STARTING,
216
- pid: info.pid,
217
- type: info.type,
167
+ status: InstanceStatus.STARTING,
168
+ startedAt: Date.now(),
169
+ desiredStatus: DesiredInstanceStatus.EXTERNAL,
170
+ owner: InstanceOwner.EXTERNAL,
218
171
  health: healthUrl,
219
172
  address,
220
173
  };
221
174
 
222
175
  this._instances.push(instance);
223
176
 
224
- this._emit(systemId, EVENT_INSTANCE_CREATED, instance);
177
+ this.emitSystemEvent(systemId, EVENT_INSTANCE_CREATED, instance);
225
178
  }
226
179
 
227
- this._save();
180
+ this.save();
181
+
182
+ return instance;
228
183
  }
229
184
 
230
- setInstanceAsStopped(systemId: string, instanceId: string) {
231
- const instance = _.find(this._instances, { systemId, instanceId });
232
- if (instance) {
233
- instance.status = STATUS_STOPPED;
234
- instance.pid = null;
235
- instance.health = null;
236
- this._emit(systemId, EVENT_STATUS_CHANGED, instance);
237
- this._save();
185
+ private getHealthUrl(info: Omit<InstanceInfo, 'systemId' | 'instanceId'>, address: string) {
186
+ let healthUrl = null;
187
+ let health = info.health;
188
+ if (health) {
189
+ if (health.startsWith('/')) {
190
+ health = health.substring(1);
191
+ }
192
+ healthUrl = address + health;
238
193
  }
194
+ return healthUrl;
239
195
  }
240
196
 
241
- _emit(systemId: string, type: string, payload: any) {
242
- try {
243
- socketManager.emit(`${systemId}/instances`, type, payload);
244
- } catch (e: any) {
245
- console.warn('Failed to emit instance event: %s', e.message);
197
+ public markAsStopped(systemId: string, instanceId: string) {
198
+ systemId = normalizeKapetaUri(systemId);
199
+ const instance = _.find(this._instances, { systemId, instanceId });
200
+ if (instance && instance.owner === InstanceOwner.EXTERNAL && instance.status !== InstanceStatus.STOPPED) {
201
+ instance.status = InstanceStatus.STOPPED;
202
+ instance.pid = null;
203
+ instance.health = null;
204
+ this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
205
+ this.save();
246
206
  }
247
207
  }
248
208
 
249
- async createProcessesForPlan(planRef: string): Promise<ProcessInfo[]> {
250
- await this.stopAllForPlan(planRef);
251
-
252
- const plan = await assetManager.getPlan(planRef, true);
209
+ public async startAllForPlan(systemId: string): Promise<InstanceInfo[]> {
210
+ systemId = normalizeKapetaUri(systemId);
211
+ const plan = await assetManager.getPlan(systemId, true);
253
212
  if (!plan) {
254
- throw new Error('Plan not found: ' + planRef);
213
+ throw new Error('Plan not found: ' + systemId);
255
214
  }
256
215
 
257
216
  if (!plan.spec.blocks) {
258
- console.warn('No blocks found in plan', planRef);
217
+ console.warn('No blocks found in plan', systemId);
259
218
  return [];
260
219
  }
261
220
 
262
- let promises: Promise<ProcessInfo>[] = [];
221
+ let promises: Promise<InstanceInfo>[] = [];
263
222
  let errors = [];
264
223
  for (let blockInstance of Object.values(plan.spec.blocks as BlockInstance[])) {
265
224
  try {
266
- promises.push(this.createProcess(planRef, blockInstance.id));
225
+ promises.push(this.start(systemId, blockInstance.id));
267
226
  } catch (e) {
268
227
  errors.push(e);
269
228
  }
@@ -275,64 +234,76 @@ class InstanceManager {
275
234
  throw errors[0];
276
235
  }
277
236
 
278
- return settled.map((p) => (p.status === 'fulfilled' ? p.value : null)).filter((p) => !!p) as ProcessInfo[];
237
+ return settled.map((p) => (p.status === 'fulfilled' ? p.value : null)).filter((p) => !!p) as InstanceInfo[];
279
238
  }
280
239
 
281
- async _stopInstance(instance: InstanceInfo) {
282
- if (!instance.pid) {
240
+ public async stop(systemId: string, instanceId: string) {
241
+ systemId = normalizeKapetaUri(systemId);
242
+ const instance = this.getInstance(systemId, instanceId);
243
+ if (!instance) {
283
244
  return;
284
245
  }
285
246
 
286
- if (instance.status === 'stopped') {
247
+ if (instance.status === InstanceStatus.STOPPED) {
287
248
  return;
288
249
  }
289
250
 
251
+ if (instance.desiredStatus !== DesiredInstanceStatus.EXTERNAL) {
252
+ instance.desiredStatus = DesiredInstanceStatus.STOP;
253
+ }
254
+
255
+ instance.status = InstanceStatus.STOPPING;
256
+
257
+ this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
258
+ console.log('Stopping instance: %s::%s [desired: %s]', systemId, instanceId, instance.desiredStatus);
259
+ this.save();
260
+
290
261
  try {
291
262
  if (instance.type === 'docker') {
292
- const container = await containerManager.get(instance.pid as string);
263
+ const containerName = getBlockInstanceContainerName(instance.instanceId);
264
+ const container = await containerManager.getContainerByName(containerName);
293
265
  if (container) {
294
266
  try {
295
267
  await container.stop();
268
+ instance.status = InstanceStatus.STOPPED;
269
+ this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
270
+ this.save();
296
271
  } catch (e) {
297
272
  console.error('Failed to stop container', e);
298
273
  }
274
+ } else {
275
+ console.warn('Container not found', containerName);
299
276
  }
300
277
  return;
301
278
  }
279
+
280
+ if (!instance.pid) {
281
+ instance.status = InstanceStatus.STOPPED;
282
+ this.save();
283
+ return;
284
+ }
285
+
302
286
  process.kill(instance.pid as number, 'SIGTERM');
287
+ instance.status = InstanceStatus.STOPPED;
288
+ this.emitSystemEvent(systemId, EVENT_STATUS_CHANGED, instance);
289
+ this.save();
303
290
  } catch (e) {
304
291
  console.error('Failed to stop process', e);
305
292
  }
306
293
  }
307
294
 
308
- async stopAllForPlan(planRef: string) {
309
- if (this._processes[planRef]) {
310
- const promises = [];
311
- console.log('Stopping all processes for plan', planRef);
312
- for (let instance of Object.values(this._processes[planRef])) {
313
- promises.push(instance.stop());
314
- }
315
-
316
- await Promise.all(promises);
295
+ public async stopAllForPlan(systemId: string) {
296
+ systemId = normalizeKapetaUri(systemId);
297
+ const instancesForPlan = this._instances.filter((instance) => instance.systemId === systemId);
317
298
 
318
- this._processes[planRef] = {};
319
- }
320
-
321
- //Also stop instances not being maintained by the cluster service
322
- const instancesForPlan = this._instances.filter((instance) => instance.systemId === planRef);
323
-
324
- const promises = [];
325
- for (let instance of instancesForPlan) {
326
- promises.push(this._stopInstance(instance));
327
- }
328
-
329
- await Promise.all(promises);
299
+ return this.stopInstances(instancesForPlan);
330
300
  }
331
301
 
332
- async createProcess(planRef: string, instanceId: string): Promise<ProcessInfo> {
333
- const plan = await assetManager.getPlan(planRef, true);
302
+ public async start(systemId: string, instanceId: string): Promise<InstanceInfo> {
303
+ systemId = normalizeKapetaUri(systemId);
304
+ const plan = await assetManager.getPlan(systemId, true);
334
305
  if (!plan) {
335
- throw new Error('Plan not found: ' + planRef);
306
+ throw new Error('Plan not found: ' + systemId);
336
307
  }
337
308
 
338
309
  const blockInstance = plan.spec && plan.spec.blocks ? _.find(plan.spec.blocks, { id: instanceId }) : null;
@@ -340,42 +311,84 @@ class InstanceManager {
340
311
  throw new Error('Block instance not found: ' + instanceId);
341
312
  }
342
313
 
343
- const blockRef = blockInstance.block.ref;
314
+ const blockRef = normalizeKapetaUri(blockInstance.block.ref);
344
315
 
345
316
  const blockAsset = await assetManager.getAsset(blockRef, true);
346
- const instanceConfig = await configManager.getConfigForSection(planRef, instanceId);
347
-
348
317
  if (!blockAsset) {
349
318
  throw new Error('Block not found: ' + blockRef);
350
319
  }
351
320
 
352
- if (!this._processes[planRef]) {
353
- this._processes[planRef] = {};
321
+ const existingInstance = this.getInstance(systemId, instanceId);
322
+
323
+ if (existingInstance) {
324
+ if (existingInstance.status === InstanceStatus.READY) {
325
+ // Instance is already running
326
+ return existingInstance;
327
+ }
328
+
329
+ if (
330
+ existingInstance.desiredStatus === DesiredInstanceStatus.RUN &&
331
+ existingInstance.status === InstanceStatus.STARTING
332
+ ) {
333
+ // Internal instance is already starting - don't start it again
334
+ return existingInstance;
335
+ }
336
+
337
+ if (
338
+ existingInstance.owner === InstanceOwner.EXTERNAL &&
339
+ existingInstance.status === InstanceStatus.STARTING
340
+ ) {
341
+ // External instance is already starting - don't start it again
342
+ return existingInstance;
343
+ }
354
344
  }
355
345
 
356
- await this.stopProcess(planRef, instanceId);
357
- const type = blockAsset.version === 'local' ? 'local' : 'docker';
346
+ let instance: InstanceInfo = {
347
+ systemId,
348
+ instanceId,
349
+ ref: blockRef,
350
+ name: blockAsset.data.metadata.name,
351
+ desiredStatus: DesiredInstanceStatus.RUN,
352
+ owner: InstanceOwner.INTERNAL,
353
+ type: InstanceType.UNKNOWN,
354
+ status: InstanceStatus.STARTING,
355
+ startedAt: Date.now(),
356
+ };
357
+
358
+ console.log('Starting instance: %s::%s [desired: %s]', systemId, instanceId, instance.desiredStatus);
359
+ // Save the instance before starting it, so that we can track the status
360
+ await this.saveInternalInstance(instance);
361
+
362
+ if (existingInstance) {
363
+ // Check if the instance is already running - but after we've commmuicated the desired status
364
+ const currentStatus = await this.requestInstanceStatus(existingInstance);
365
+ if (currentStatus === InstanceStatus.READY) {
366
+ // Instance is already running
367
+ return existingInstance;
368
+ }
369
+ }
358
370
 
359
- const runner = new BlockInstanceRunner(planRef);
371
+ const instanceConfig = await configManager.getConfigForSection(systemId, instanceId);
372
+ const runner = new BlockInstanceRunner(systemId);
360
373
 
361
374
  const startTime = Date.now();
362
375
  try {
363
- const process = await runner.start(blockRef, instanceId, instanceConfig);
376
+ const processInfo = await runner.start(blockRef, instanceId, instanceConfig);
364
377
  //emit stdout/stderr via sockets
365
- process.output.on('data', (data: Buffer) => {
378
+ processInfo.output.on('data', (data: Buffer) => {
366
379
  const payload = {
367
380
  source: 'stdout',
368
381
  level: 'INFO',
369
382
  message: data.toString(),
370
383
  time: Date.now(),
371
384
  };
372
- this._emit(instanceId, EVENT_INSTANCE_LOG, payload);
385
+ this.emitInstanceEvent(systemId, instanceId, EVENT_INSTANCE_LOG, payload);
373
386
  });
374
387
 
375
- process.output.on('exit', (exitCode: number) => {
388
+ processInfo.output.on('exit', (exitCode: number) => {
376
389
  const timeRunning = Date.now() - startTime;
377
- const instance = this.getInstance(planRef, instanceId);
378
- if (instance?.status === STATUS_READY) {
390
+ const instance = this.getInstance(systemId, instanceId);
391
+ if (instance?.status === InstanceStatus.READY) {
379
392
  //It's already been running
380
393
  return;
381
394
  }
@@ -387,7 +400,13 @@ class InstanceManager {
387
400
  }
388
401
 
389
402
  if (exitCode !== 0 || timeRunning < MIN_TIME_RUNNING) {
390
- this._emit(blockInstance.id, EVENT_INSTANCE_EXITED, {
403
+ const instance = this.getInstance(systemId, instanceId);
404
+ if (instance) {
405
+ instance.status = InstanceStatus.FAILED;
406
+ this.save();
407
+ }
408
+
409
+ this.emitSystemEvent(systemId, EVENT_INSTANCE_EXITED, {
391
410
  error: 'Failed to start instance',
392
411
  status: EVENT_INSTANCE_EXITED,
393
412
  instanceId: blockInstance.id,
@@ -395,15 +414,20 @@ class InstanceManager {
395
414
  }
396
415
  });
397
416
 
398
- await this.registerInstance(planRef, instanceId, {
399
- type: process.type,
400
- pid: process.pid ?? -1,
417
+ instance.status = InstanceStatus.READY;
418
+
419
+ return this.saveInternalInstance({
420
+ ...instance,
421
+ type: processInfo.type,
422
+ pid: processInfo.pid ?? -1,
401
423
  health: null,
402
- portType: process.portType,
403
- status: STATUS_STARTING,
424
+ portType: processInfo.portType,
425
+ status: InstanceStatus.READY,
426
+ internal: {
427
+ logs: processInfo.logs,
428
+ output: processInfo.output,
429
+ },
404
430
  });
405
-
406
- return (this._processes[planRef][instanceId] = process);
407
431
  } catch (e: any) {
408
432
  console.warn('Failed to start instance', e);
409
433
  const logs: LogEntry[] = [
@@ -415,83 +439,288 @@ class InstanceManager {
415
439
  },
416
440
  ];
417
441
 
418
- await this.registerInstance(planRef, instanceId, {
419
- type: 'local',
442
+ const out = await this.saveInternalInstance({
443
+ ...instance,
444
+ type: InstanceType.LOCAL,
420
445
  pid: null,
421
446
  health: null,
422
447
  portType: DEFAULT_HEALTH_PORT_TYPE,
423
- status: STATUS_UNHEALTHY,
448
+ status: InstanceStatus.FAILED,
424
449
  });
425
450
 
426
- this._emit(instanceId, EVENT_INSTANCE_LOG, logs[0]);
451
+ this.emitInstanceEvent(systemId, instanceId, EVENT_INSTANCE_LOG, logs[0]);
427
452
 
428
- this._emit(blockInstance.id, EVENT_INSTANCE_EXITED, {
453
+ this.emitInstanceEvent(systemId, blockInstance.id, EVENT_INSTANCE_EXITED, {
429
454
  error: `Failed to start instance: ${e.message}`,
430
455
  status: EVENT_INSTANCE_EXITED,
431
456
  instanceId: blockInstance.id,
432
457
  });
433
458
 
434
- return (this._processes[planRef][instanceId] = {
435
- pid: -1,
436
- type,
437
- logs: () => logs,
438
- stop: () => Promise.resolve(),
439
- ref: blockRef,
440
- id: instanceId,
441
- name: blockInstance.name,
442
- output: new EventEmitter(),
443
- });
459
+ return out;
444
460
  }
445
461
  }
446
462
 
447
- /**
448
- *
449
- * @param {string} planRef
450
- * @param {string} instanceId
451
- * @return {ProcessInfo|null}
452
- */
453
- getProcessForInstance(planRef: string, instanceId: string) {
454
- if (!this._processes[planRef]) {
455
- return null;
456
- }
463
+ public async restart(systemId: string, instanceId: string) {
464
+ systemId = normalizeKapetaUri(systemId);
465
+ await this.stop(systemId, instanceId);
457
466
 
458
- return this._processes[planRef][instanceId];
467
+ return this.start(systemId, instanceId);
459
468
  }
460
469
 
461
- async restartIfRunning(planRef: string, instanceId: string) {
462
- if (!this._processes[planRef] || !this._processes[planRef][instanceId]) {
463
- return;
470
+ public async stopAll() {
471
+ return this.stopInstances(this._instances);
472
+ }
473
+
474
+ private async stopInstances(instances: InstanceInfo[]) {
475
+ const promises = instances.map((instance) => this.stop(instance.systemId, instance.instanceId));
476
+ await Promise.allSettled(promises);
477
+ this.save();
478
+ }
479
+
480
+ private save() {
481
+ try {
482
+ storageService.put(
483
+ 'instances',
484
+ this._instances.map((instance) => {
485
+ const copy = { ...instance };
486
+ delete copy.internal;
487
+ return copy;
488
+ })
489
+ );
490
+ } catch (e) {
491
+ console.error('Failed to save instances', this._instances, e);
492
+ }
493
+ }
494
+
495
+ private async checkInstances() {
496
+ //console.log('\n## Checking instances:');
497
+ let changed = false;
498
+ const all = [...this._instances];
499
+ while (all.length > 0) {
500
+ // Check a few instances at a time - docker doesn't like too many concurrent requests
501
+ const chunk = all.splice(0, 20);
502
+ const promises = chunk.map(async (instance) => {
503
+ if (!instance.systemId) {
504
+ return;
505
+ }
506
+
507
+ instance.systemId = normalizeKapetaUri(instance.systemId);
508
+ if (instance.ref) {
509
+ instance.ref = normalizeKapetaUri(instance.ref);
510
+ }
511
+
512
+ const newStatus = await this.requestInstanceStatus(instance);
513
+ /*
514
+ console.log('Check instance %s %s: [current: %s, new: %s, desired: %s]',
515
+ instance.systemId, instance.instanceId, instance.status, newStatus, instance.desiredStatus);
516
+ */
517
+
518
+ if (newStatus === InstanceStatus.BUSY) {
519
+ // If instance is busy we skip it
520
+ //console.log('Instance %s %s is busy', instance.systemId, instance.instanceId);
521
+ return;
522
+ }
523
+
524
+ if (
525
+ instance.startedAt !== undefined &&
526
+ newStatus === InstanceStatus.UNHEALTHY &&
527
+ instance.startedAt + HEALTH_CHECK_TIMEOUT < Date.now() &&
528
+ instance.status === InstanceStatus.STARTING
529
+ ) {
530
+ // If instance is starting we consider unhealthy an indication
531
+ // that it is still starting
532
+ //console.log('Instance %s %s is still starting', instance.systemId, instance.instanceId);
533
+ return;
534
+ }
535
+
536
+ if (instance.status !== newStatus) {
537
+ const oldStatus = instance.status;
538
+ const skipUpdate =
539
+ (newStatus === InstanceStatus.STOPPED && instance.status === InstanceStatus.FAILED) ||
540
+ ([InstanceStatus.READY, InstanceStatus.UNHEALTHY].includes(newStatus) &&
541
+ instance.status === InstanceStatus.STOPPING &&
542
+ instance.desiredStatus === DesiredInstanceStatus.STOP) ||
543
+ (newStatus === InstanceStatus.STOPPED &&
544
+ instance.status === InstanceStatus.STARTING &&
545
+ instance.desiredStatus === DesiredInstanceStatus.RUN);
546
+
547
+ if (!skipUpdate) {
548
+ const oldStatus = instance.status;
549
+ instance.status = newStatus;
550
+ console.log(
551
+ 'Instance status changed: %s %s: %s -> %s',
552
+ instance.systemId,
553
+ instance.instanceId,
554
+ oldStatus,
555
+ instance.status
556
+ );
557
+ this.emitSystemEvent(instance.systemId, EVENT_STATUS_CHANGED, instance);
558
+ changed = true;
559
+ }
560
+ }
561
+
562
+ if (instance.desiredStatus === DesiredInstanceStatus.RUN && newStatus === InstanceStatus.STOPPED) {
563
+ //If the instance is stopped but we want it to run, start it
564
+ try {
565
+ await this.start(instance.systemId, instance.instanceId);
566
+ } catch (e: any) {
567
+ console.warn('Failed to start instance', instance.systemId, instance.instanceId, e);
568
+ }
569
+ return;
570
+ }
571
+
572
+ if (instance.desiredStatus === DesiredInstanceStatus.STOP && newStatus === InstanceStatus.READY) {
573
+ //If the instance is running but we want it to stop, stop it
574
+ try {
575
+ await this.stop(instance.systemId, instance.instanceId);
576
+ } catch (e) {
577
+ console.warn('Failed to stop instance', instance.systemId, instance.instanceId, e);
578
+ }
579
+ return;
580
+ }
581
+
582
+ if (
583
+ instance.desiredStatus === DesiredInstanceStatus.RUN &&
584
+ instance.status !== newStatus &&
585
+ newStatus === InstanceStatus.UNHEALTHY
586
+ ) {
587
+ //If the instance is unhealthy, try to restart it
588
+ console.log('Restarting unhealthy instance', instance);
589
+ try {
590
+ await this.restart(instance.systemId, instance.instanceId);
591
+ } catch (e) {
592
+ console.warn('Failed to restart instance', instance.systemId, instance.instanceId, e);
593
+ }
594
+ }
595
+ });
596
+
597
+ await Promise.allSettled(promises);
598
+ }
599
+
600
+ if (changed) {
601
+ this.save();
464
602
  }
465
603
 
466
- // createProcess will stop the process first if it's running
467
- return this.createProcess(planRef, instanceId);
604
+ //console.log('\n##\n');
468
605
  }
469
606
 
470
- async stopProcess(planRef: string, instanceId: string) {
471
- if (!this._processes[planRef]) {
472
- return;
607
+ private async getExternalStatus(instance: InstanceInfo): Promise<InstanceStatus> {
608
+ if (instance.type === InstanceType.DOCKER) {
609
+ const containerName = getBlockInstanceContainerName(instance.instanceId);
610
+ const container = await containerManager.getContainerByName(containerName);
611
+ if (!container) {
612
+ // If the container doesn't exist, we consider the instance stopped
613
+ return InstanceStatus.STOPPED;
614
+ }
615
+ const state = await container.status();
616
+
617
+ if (state.Status === 'running') {
618
+ if (state.Health?.Status === 'healthy') {
619
+ return InstanceStatus.READY;
620
+ }
621
+ if (state.Health?.Status === 'starting') {
622
+ return InstanceStatus.STARTING;
623
+ }
624
+ if (state.Health?.Status === 'unhealthy') {
625
+ return InstanceStatus.UNHEALTHY;
626
+ }
627
+
628
+ return InstanceStatus.READY;
629
+ }
630
+ if (state.Status === 'created') {
631
+ return InstanceStatus.STARTING;
632
+ }
633
+
634
+ if (state.Status === 'exited' || state.Status === 'dead') {
635
+ return InstanceStatus.STOPPED;
636
+ }
637
+
638
+ if (state.Status === 'removing') {
639
+ return InstanceStatus.BUSY;
640
+ }
641
+
642
+ if (state.Status === 'restarting') {
643
+ return InstanceStatus.BUSY;
644
+ }
645
+
646
+ if (state.Status === 'paused') {
647
+ return InstanceStatus.BUSY;
648
+ }
649
+
650
+ return InstanceStatus.STOPPED;
473
651
  }
474
652
 
475
- if (this._processes[planRef][instanceId]) {
476
- try {
477
- await this._processes[planRef][instanceId].stop();
478
- } catch (e) {
479
- console.error('Failed to stop process for instance: %s -> %s', planRef, instanceId, e);
653
+ if (!instance.pid) {
654
+ return InstanceStatus.STOPPED;
655
+ }
656
+
657
+ //Otherwise its just a normal process.
658
+ //TODO: Handle for Windows
659
+ try {
660
+ if (process.kill(instance.pid as number, 0)) {
661
+ return InstanceStatus.READY;
662
+ }
663
+ } catch (err: any) {
664
+ if (err.code === 'EPERM') {
665
+ return InstanceStatus.READY;
480
666
  }
481
- delete this._processes[planRef][instanceId];
482
667
  }
668
+
669
+ return InstanceStatus.STOPPED;
483
670
  }
484
671
 
485
- async stopAllProcesses() {
486
- for (let processesForPlan of Object.values(this._processes)) {
487
- for (let processInfo of Object.values(processesForPlan)) {
488
- await processInfo.stop();
672
+ private async requestInstanceStatus(instance: InstanceInfo): Promise<InstanceStatus> {
673
+ const externalStatus = await this.getExternalStatus(instance);
674
+ if (instance.type === InstanceType.DOCKER) {
675
+ // For docker instances we can rely on docker status
676
+ return externalStatus;
677
+ }
678
+
679
+ if (externalStatus === InstanceStatus.STOPPED) {
680
+ return externalStatus;
681
+ }
682
+
683
+ if (!instance.health) {
684
+ //No health url means we assume it's healthy as soon as it's running
685
+ return InstanceStatus.READY;
686
+ }
687
+
688
+ return new Promise((resolve) => {
689
+ if (!instance.health) {
690
+ resolve(InstanceStatus.READY);
691
+ return;
489
692
  }
693
+ request(instance.health, (err, response) => {
694
+ if (err) {
695
+ resolve(InstanceStatus.UNHEALTHY);
696
+ return;
697
+ }
698
+
699
+ if (response.statusCode > 399) {
700
+ resolve(InstanceStatus.UNHEALTHY);
701
+ return;
702
+ }
703
+
704
+ resolve(InstanceStatus.READY);
705
+ });
706
+ });
707
+ }
708
+
709
+ private emitSystemEvent(systemId: string, type: string, payload: any) {
710
+ systemId = normalizeKapetaUri(systemId);
711
+ try {
712
+ socketManager.emit(`${systemId}/instances`, type, payload);
713
+ } catch (e: any) {
714
+ console.warn('Failed to emit instance event: %s', e.message);
490
715
  }
491
- this._processes = {};
716
+ }
492
717
 
493
- for (let instance of this._instances) {
494
- await this._stopInstance(instance);
718
+ private emitInstanceEvent(systemId: string, instanceId: string, type: string, payload: any) {
719
+ systemId = normalizeKapetaUri(systemId);
720
+ try {
721
+ socketManager.emit(`${systemId}/instances/${instanceId}`, type, payload);
722
+ } catch (e: any) {
723
+ console.warn('Failed to emit instance event: %s', e.message);
495
724
  }
496
725
  }
497
726
  }
@@ -499,5 +728,5 @@ class InstanceManager {
499
728
  export const instanceManager = new InstanceManager();
500
729
 
501
730
  process.on('exit', async () => {
502
- await instanceManager.stopAllProcesses();
731
+ await instanceManager.stopAll();
503
732
  });