amalgm 0.1.41 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -2
- package/lib/cli.js +76 -42
- package/lib/process-cleanup.js +239 -0
- package/lib/runtime-manifest.js +131 -0
- package/lib/service.js +44 -41
- package/lib/supervisor.js +224 -51
- package/package.json +2 -2
- package/runtime/scripts/amalgm-mcp/artifacts/supervisor.js +9 -0
- package/runtime/scripts/amalgm-mcp/events/executor.js +1 -1
- package/runtime/scripts/amalgm-mcp/tasks/executor.js +8 -0
- package/runtime/scripts/fs-watcher.js +45 -10
- package/runtime/scripts/local-gateway.js +4 -0
- package/runtime/scripts/port-monitor.js +54 -16
package/lib/service.js
CHANGED
|
@@ -15,6 +15,9 @@ const {
|
|
|
15
15
|
SERVICE_STATE_FILE,
|
|
16
16
|
SERVICE_STOP_FILE,
|
|
17
17
|
} = require('./paths');
|
|
18
|
+
const {
|
|
19
|
+
cleanupStaleRuntimeProcesses,
|
|
20
|
+
} = require('./process-cleanup');
|
|
18
21
|
|
|
19
22
|
const PACKAGE_VERSION = require('../package.json').version;
|
|
20
23
|
const SERVICE_LABEL = 'ai.amalgm.runtime';
|
|
@@ -250,6 +253,7 @@ function writePortableScript(localOnly) {
|
|
|
250
253
|
`LOCK_DIR=${shellQuote(path.join(SERVICE_DIR, 'lock'))}`,
|
|
251
254
|
`NODE_BIN=${shellQuote(process.execPath)}`,
|
|
252
255
|
`AMALGM_BIN=${shellQuote(AMALGM_BIN)}`,
|
|
256
|
+
`NODE_WATCHDOG=${shellQuote(SERVICE_NODE_SCRIPT_FILE)}`,
|
|
253
257
|
`DAEMON_LOG=${shellQuote(path.join(LOG_DIR, 'daemon.log'))}`,
|
|
254
258
|
`SERVICE_LOG=${shellQuote(SERVICE_LOG_FILE)}`,
|
|
255
259
|
`RESTART_DELAY=${shellQuote(String(process.env.AMALGM_SERVICE_RESTART_DELAY || '5'))}`,
|
|
@@ -261,42 +265,7 @@ function writePortableScript(localOnly) {
|
|
|
261
265
|
|
|
262
266
|
lines.push(
|
|
263
267
|
'mkdir -p "$AMALGM_DIR" "$LOG_DIR"',
|
|
264
|
-
'
|
|
265
|
-
' existing="$(cat "$PID_FILE" 2>/dev/null | sed -n \'s/.*"pid"[[:space:]]*:[[:space:]]*\\([0-9][0-9]*\\).*/\\1/p\')"',
|
|
266
|
-
' [ -n "$existing" ] || existing="$(cat "$PID_FILE" 2>/dev/null || true)"',
|
|
267
|
-
' if [ -n "$existing" ] && kill -0 "$existing" 2>/dev/null; then',
|
|
268
|
-
' exit 0',
|
|
269
|
-
' fi',
|
|
270
|
-
' rm -rf "$LOCK_DIR"',
|
|
271
|
-
' mkdir "$LOCK_DIR" || exit 1',
|
|
272
|
-
'fi',
|
|
273
|
-
'child_pid=""',
|
|
274
|
-
'cleanup() {',
|
|
275
|
-
' touch "$STOP_FILE" 2>/dev/null || true',
|
|
276
|
-
' if [ -n "$child_pid" ] && kill -0 "$child_pid" 2>/dev/null; then',
|
|
277
|
-
' kill "$child_pid" 2>/dev/null || true',
|
|
278
|
-
' wait "$child_pid" 2>/dev/null || true',
|
|
279
|
-
' fi',
|
|
280
|
-
' rm -f "$PID_FILE"',
|
|
281
|
-
' rm -rf "$LOCK_DIR"',
|
|
282
|
-
' exit 0',
|
|
283
|
-
'}',
|
|
284
|
-
'trap cleanup TERM INT HUP',
|
|
285
|
-
'printf \'{"pid":%s,"started_at":"%s","backend":"portable"}\\n\' "$$" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$PID_FILE"',
|
|
286
|
-
'rm -f "$STOP_FILE"',
|
|
287
|
-
'while [ ! -f "$STOP_FILE" ]; do',
|
|
288
|
-
' printf \'[%s] [service] starting amalgm run\\n\' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$SERVICE_LOG"',
|
|
289
|
-
' "$NODE_BIN" "$AMALGM_BIN" run >> "$DAEMON_LOG" 2>&1 &',
|
|
290
|
-
' child_pid=$!',
|
|
291
|
-
' wait "$child_pid"',
|
|
292
|
-
' code=$?',
|
|
293
|
-
' child_pid=""',
|
|
294
|
-
' printf \'[%s] [service] amalgm run exited code=%s\\n\' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$code" >> "$SERVICE_LOG"',
|
|
295
|
-
' [ -f "$STOP_FILE" ] && break',
|
|
296
|
-
' sleep "$RESTART_DELAY"',
|
|
297
|
-
'done',
|
|
298
|
-
'rm -f "$PID_FILE"',
|
|
299
|
-
'rm -rf "$LOCK_DIR"',
|
|
268
|
+
'exec "$NODE_BIN" "$NODE_WATCHDOG"',
|
|
300
269
|
);
|
|
301
270
|
|
|
302
271
|
fs.writeFileSync(SERVICE_SCRIPT_FILE, `${lines.join('\n')}\n`, { mode: 0o700 });
|
|
@@ -323,6 +292,8 @@ function writePortableNodeScript(localOnly) {
|
|
|
323
292
|
daemonLog: path.join(LOG_DIR, 'daemon.log'),
|
|
324
293
|
serviceLog: SERVICE_LOG_FILE,
|
|
325
294
|
restartDelayMs: Math.max(1000, Number(process.env.AMALGM_SERVICE_RESTART_DELAY || 5) * 1000),
|
|
295
|
+
maxRestartDelayMs: Math.max(5000, Number(process.env.AMALGM_SERVICE_MAX_RESTART_DELAY || 60) * 1000),
|
|
296
|
+
stableAfterMs: Math.max(10000, Number(process.env.AMALGM_SERVICE_STABLE_AFTER || 60) * 1000),
|
|
326
297
|
env: buildServiceEnv(localOnly),
|
|
327
298
|
};
|
|
328
299
|
const source = `'use strict';
|
|
@@ -330,11 +301,16 @@ function writePortableNodeScript(localOnly) {
|
|
|
330
301
|
const fs = require('fs');
|
|
331
302
|
const path = require('path');
|
|
332
303
|
const { spawn } = require('child_process');
|
|
304
|
+
const {
|
|
305
|
+
cleanupStaleRuntimeProcesses,
|
|
306
|
+
} = require(path.join(path.dirname(${JSON.stringify(AMALGM_BIN)}), '..', 'lib', 'process-cleanup'));
|
|
333
307
|
|
|
334
308
|
const config = ${JSON.stringify(config, null, 2)};
|
|
335
309
|
let child = null;
|
|
336
310
|
let stopping = false;
|
|
337
311
|
let exiting = false;
|
|
312
|
+
let restartDelayMs = config.restartDelayMs;
|
|
313
|
+
let childStartedAt = 0;
|
|
338
314
|
|
|
339
315
|
function ensureDir(dir) {
|
|
340
316
|
fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
@@ -411,21 +387,42 @@ function cleanup(exitCode = 0) {
|
|
|
411
387
|
if (child && !child.killed) {
|
|
412
388
|
try { child.kill('SIGKILL'); } catch {}
|
|
413
389
|
}
|
|
390
|
+
cleanupStaleRuntimeProcesses({
|
|
391
|
+
excludePids: [process.pid],
|
|
392
|
+
includeSupervisors: true,
|
|
393
|
+
logger: { log: (message) => append(config.serviceLog, message) },
|
|
394
|
+
});
|
|
414
395
|
finish(exitCode);
|
|
415
396
|
}, 5000);
|
|
416
397
|
child.once('exit', () => {
|
|
417
398
|
clearTimeout(timer);
|
|
399
|
+
cleanupStaleRuntimeProcesses({
|
|
400
|
+
excludePids: [process.pid],
|
|
401
|
+
includeSupervisors: true,
|
|
402
|
+
logger: { log: (message) => append(config.serviceLog, message) },
|
|
403
|
+
});
|
|
418
404
|
finish(exitCode);
|
|
419
405
|
});
|
|
420
406
|
return;
|
|
421
407
|
}
|
|
408
|
+
cleanupStaleRuntimeProcesses({
|
|
409
|
+
excludePids: [process.pid],
|
|
410
|
+
includeSupervisors: true,
|
|
411
|
+
logger: { log: (message) => append(config.serviceLog, message) },
|
|
412
|
+
});
|
|
422
413
|
finish(exitCode);
|
|
423
414
|
}
|
|
424
415
|
|
|
425
416
|
function launch() {
|
|
426
417
|
if (stopping || fs.existsSync(config.stopFile)) return cleanup(0);
|
|
418
|
+
cleanupStaleRuntimeProcesses({
|
|
419
|
+
excludePids: [process.pid],
|
|
420
|
+
includeSupervisors: true,
|
|
421
|
+
logger: { log: (message) => append(config.serviceLog, message) },
|
|
422
|
+
});
|
|
427
423
|
append(config.serviceLog, 'starting amalgm run');
|
|
428
424
|
const fd = fs.openSync(config.daemonLog, 'a');
|
|
425
|
+
childStartedAt = Date.now();
|
|
429
426
|
child = spawn(config.nodeBin, [config.amalgmBin, 'run'], {
|
|
430
427
|
cwd: process.env.HOME || process.cwd(),
|
|
431
428
|
env: { ...process.env, ...config.env },
|
|
@@ -434,10 +431,15 @@ function launch() {
|
|
|
434
431
|
});
|
|
435
432
|
try { fs.closeSync(fd); } catch {}
|
|
436
433
|
child.on('exit', (code, signal) => {
|
|
434
|
+
const runtimeMs = Date.now() - childStartedAt;
|
|
437
435
|
append(config.serviceLog, \`amalgm run exited code=\${code ?? ''} signal=\${signal ?? ''}\`);
|
|
438
436
|
child = null;
|
|
439
437
|
if (stopping || fs.existsSync(config.stopFile)) return cleanup(0);
|
|
440
|
-
|
|
438
|
+
if (runtimeMs >= config.stableAfterMs) restartDelayMs = config.restartDelayMs;
|
|
439
|
+
const delay = restartDelayMs;
|
|
440
|
+
restartDelayMs = Math.min(restartDelayMs * 2, config.maxRestartDelayMs);
|
|
441
|
+
append(config.serviceLog, \`restarting amalgm run in \${delay}ms\`);
|
|
442
|
+
setTimeout(launch, delay);
|
|
441
443
|
});
|
|
442
444
|
}
|
|
443
445
|
|
|
@@ -655,10 +657,8 @@ function startPortable() {
|
|
|
655
657
|
removeFile(SERVICE_STOP_FILE);
|
|
656
658
|
ensureDir(LOG_DIR, 0o700);
|
|
657
659
|
const logFd = fs.openSync(SERVICE_LOG_FILE, 'a');
|
|
658
|
-
const command = process.
|
|
659
|
-
const args =
|
|
660
|
-
? [state?.files?.nodeScript || SERVICE_NODE_SCRIPT_FILE]
|
|
661
|
-
: [state?.files?.script || SERVICE_SCRIPT_FILE];
|
|
660
|
+
const command = process.execPath;
|
|
661
|
+
const args = [state?.files?.nodeScript || SERVICE_NODE_SCRIPT_FILE];
|
|
662
662
|
const child = spawn(command, args, {
|
|
663
663
|
detached: true,
|
|
664
664
|
env: {
|
|
@@ -682,6 +682,7 @@ function stopDaemonProcess() {
|
|
|
682
682
|
const pid = readPidFile(PID_FILE);
|
|
683
683
|
if (!isPidRunning(pid)) {
|
|
684
684
|
removeFile(PID_FILE);
|
|
685
|
+
cleanupStaleRuntimeProcesses({ includeSupervisors: true });
|
|
685
686
|
return false;
|
|
686
687
|
}
|
|
687
688
|
try {
|
|
@@ -701,6 +702,7 @@ function stopDaemonProcess() {
|
|
|
701
702
|
}
|
|
702
703
|
}
|
|
703
704
|
removeFile(PID_FILE);
|
|
705
|
+
cleanupStaleRuntimeProcesses({ includeSupervisors: true });
|
|
704
706
|
return true;
|
|
705
707
|
}
|
|
706
708
|
|
|
@@ -720,6 +722,7 @@ function stopPortable() {
|
|
|
720
722
|
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 100);
|
|
721
723
|
}
|
|
722
724
|
removeFile(SERVICE_PID_FILE);
|
|
725
|
+
cleanupStaleRuntimeProcesses({ includeSupervisors: true });
|
|
723
726
|
}
|
|
724
727
|
|
|
725
728
|
function startService(options = {}) {
|
package/lib/supervisor.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
const crypto = require('crypto');
|
|
4
4
|
const fs = require('fs');
|
|
5
|
+
const http = require('http');
|
|
5
6
|
const os = require('os');
|
|
6
7
|
const path = require('path');
|
|
7
8
|
const { spawn, spawnSync } = require('child_process');
|
|
@@ -26,8 +27,33 @@ const {
|
|
|
26
27
|
ensureAgentCommandShims,
|
|
27
28
|
ensureNativeBinaries,
|
|
28
29
|
} = require('../runtime/scripts/chat-core/tooling/native-binaries');
|
|
30
|
+
const {
|
|
31
|
+
cleanupStaleRuntimeProcesses,
|
|
32
|
+
isPidRunning,
|
|
33
|
+
supervisorProcesses,
|
|
34
|
+
} = require('./process-cleanup');
|
|
35
|
+
const {
|
|
36
|
+
runtimeLaunchServices,
|
|
37
|
+
runtimePortsState,
|
|
38
|
+
runtimeServiceScripts,
|
|
39
|
+
} = require('./runtime-manifest');
|
|
29
40
|
const PACKAGE_VERSION = require('../package.json').version;
|
|
30
41
|
|
|
42
|
+
const CHILD_RESTART_POLICY = {
|
|
43
|
+
initialDelayMs: 1000,
|
|
44
|
+
maxDelayMs: 30000,
|
|
45
|
+
stableAfterMs: 30000,
|
|
46
|
+
maxRestarts: 8,
|
|
47
|
+
windowMs: 60000,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const CHILD_HEALTH_POLICY = {
|
|
51
|
+
graceMs: 8000,
|
|
52
|
+
intervalMs: 10000,
|
|
53
|
+
timeoutMs: 2500,
|
|
54
|
+
maxFailures: 3,
|
|
55
|
+
};
|
|
56
|
+
|
|
31
57
|
function ensureDir(dir, mode = 0o700) {
|
|
32
58
|
fs.mkdirSync(dir, { recursive: true, mode });
|
|
33
59
|
try {
|
|
@@ -98,13 +124,7 @@ function runtimeEntry(relativePath) {
|
|
|
98
124
|
}
|
|
99
125
|
|
|
100
126
|
function assertRuntimePresent() {
|
|
101
|
-
const required =
|
|
102
|
-
runtimeEntry('scripts/amalgm-mcp/index.js'),
|
|
103
|
-
runtimeEntry('scripts/chat-server.js'),
|
|
104
|
-
runtimeEntry('scripts/fs-watcher.js'),
|
|
105
|
-
runtimeEntry('scripts/local-gateway.js'),
|
|
106
|
-
runtimeEntry('scripts/port-monitor.js'),
|
|
107
|
-
];
|
|
127
|
+
const required = runtimeServiceScripts().map((script) => runtimeEntry(script));
|
|
108
128
|
const missing = required.filter((file) => !fs.existsSync(file));
|
|
109
129
|
if (missing.length > 0) {
|
|
110
130
|
throw new Error(
|
|
@@ -172,7 +192,7 @@ function ensureRuntimeToken() {
|
|
|
172
192
|
return token;
|
|
173
193
|
}
|
|
174
194
|
|
|
175
|
-
function baseRuntimeEnv(record, ports) {
|
|
195
|
+
function baseRuntimeEnv(record, ports, options = {}) {
|
|
176
196
|
const workspaceRoot =
|
|
177
197
|
process.env.AMALGM_WORKSPACES_DIR ||
|
|
178
198
|
process.env.AMALGM_PROJECTS_DIR ||
|
|
@@ -180,11 +200,13 @@ function baseRuntimeEnv(record, ports) {
|
|
|
180
200
|
ensureDir(workspaceRoot);
|
|
181
201
|
const defaultCwd = process.env.AMALGM_DEFAULT_CWD || workspaceRoot;
|
|
182
202
|
const proxyToken = proxyTokenFromRecord(record);
|
|
203
|
+
const localOnly = !!options.localOnly || process.env.AMALGM_LOCAL_ONLY === 'true';
|
|
183
204
|
const env = {
|
|
184
205
|
...safeBaseProcessEnv(),
|
|
185
206
|
AMALGM_RUNTIME_SOURCE: 'npm',
|
|
186
207
|
AMALGM_RUNTIME_TOKEN: record?.runtime_token || process.env.AMALGM_RUNTIME_TOKEN || '',
|
|
187
208
|
AMALGM_LOCAL_MODE: 'true',
|
|
209
|
+
AMALGM_LOCAL_ONLY: localOnly ? 'true' : 'false',
|
|
188
210
|
AMALGM_BIND_HOST: process.env.AMALGM_BIND_HOST || '127.0.0.1',
|
|
189
211
|
AMALGM_AUTH_BACKUP_ENABLED: process.env.AMALGM_AUTH_BACKUP_ENABLED || 'false',
|
|
190
212
|
AMALGM_CREATE_AUTH_WATCH_DIRS: process.env.AMALGM_CREATE_AUTH_WATCH_DIRS || 'false',
|
|
@@ -216,40 +238,16 @@ function baseRuntimeEnv(record, ports) {
|
|
|
216
238
|
return env;
|
|
217
239
|
}
|
|
218
240
|
|
|
219
|
-
function serviceSpecs(record, ports) {
|
|
220
|
-
const env = baseRuntimeEnv(record, ports);
|
|
221
|
-
return
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
name: 'fs-watcher',
|
|
230
|
-
command: process.execPath,
|
|
231
|
-
args: [runtimeEntry('scripts/fs-watcher.js')],
|
|
232
|
-
env,
|
|
233
|
-
},
|
|
234
|
-
{
|
|
235
|
-
name: 'chat-server',
|
|
236
|
-
command: process.execPath,
|
|
237
|
-
args: [runtimeEntry('scripts/chat-server.js')],
|
|
238
|
-
env,
|
|
239
|
-
},
|
|
240
|
-
{
|
|
241
|
-
name: 'amalgm-mcp',
|
|
242
|
-
command: process.execPath,
|
|
243
|
-
args: [runtimeEntry('scripts/amalgm-mcp/index.js')],
|
|
244
|
-
env,
|
|
245
|
-
},
|
|
246
|
-
{
|
|
247
|
-
name: 'local-gateway',
|
|
248
|
-
command: process.execPath,
|
|
249
|
-
args: [runtimeEntry('scripts/local-gateway.js')],
|
|
250
|
-
env,
|
|
251
|
-
},
|
|
252
|
-
];
|
|
241
|
+
function serviceSpecs(record, ports, options = {}) {
|
|
242
|
+
const env = baseRuntimeEnv(record, ports, options);
|
|
243
|
+
return runtimeLaunchServices().map((service) => ({
|
|
244
|
+
name: service.name,
|
|
245
|
+
command: process.execPath,
|
|
246
|
+
args: [runtimeEntry(service.script)],
|
|
247
|
+
env: service.name === 'port-monitor' ? { ...env, PORT: process.env.PORT || '0' } : env,
|
|
248
|
+
port: ports[service.portKey],
|
|
249
|
+
healthPath: '/healthz',
|
|
250
|
+
}));
|
|
253
251
|
}
|
|
254
252
|
|
|
255
253
|
function isPortFree(port, host = '127.0.0.1') {
|
|
@@ -314,13 +312,11 @@ async function pickPort(name, envName, preferred, used) {
|
|
|
314
312
|
|
|
315
313
|
async function resolveRuntimePorts() {
|
|
316
314
|
const used = new Set();
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
chatServer: await pickPort('chat-server', 'CHAT_SERVER_PORT', 8084, used),
|
|
323
|
-
};
|
|
315
|
+
const ports = {};
|
|
316
|
+
for (const service of runtimeLaunchServices()) {
|
|
317
|
+
ports[service.portKey] = await pickPort(service.name, service.envName, service.defaultPort, used);
|
|
318
|
+
}
|
|
319
|
+
return ports;
|
|
324
320
|
}
|
|
325
321
|
|
|
326
322
|
function openServiceLog(name) {
|
|
@@ -343,14 +339,150 @@ function writePrefixed(stream, name, chunk, consoleStream) {
|
|
|
343
339
|
}
|
|
344
340
|
}
|
|
345
341
|
|
|
342
|
+
function updateRuntimeState(update) {
|
|
343
|
+
const current = readJson(RUNTIME_STATE_FILE, {});
|
|
344
|
+
if (!current || (current.pid !== process.pid && current.supervisor_pid !== process.pid)) return;
|
|
345
|
+
writeJsonSecret(RUNTIME_STATE_FILE, {
|
|
346
|
+
...current,
|
|
347
|
+
...update,
|
|
348
|
+
updated_at: new Date().toISOString(),
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function appendServiceState(name, update) {
|
|
353
|
+
const current = readJson(RUNTIME_STATE_FILE, {});
|
|
354
|
+
if (!current || (current.pid !== process.pid && current.supervisor_pid !== process.pid)) return;
|
|
355
|
+
const services = Array.isArray(current.services) ? current.services : [];
|
|
356
|
+
const next = services.map((service) => (
|
|
357
|
+
service.name === name ? { ...service, ...update } : service
|
|
358
|
+
));
|
|
359
|
+
updateRuntimeState({ services: next });
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function checkServiceHealth(spec, timeoutMs = CHILD_HEALTH_POLICY.timeoutMs) {
|
|
363
|
+
if (!spec.port) return Promise.resolve(true);
|
|
364
|
+
return new Promise((resolve) => {
|
|
365
|
+
const req = http.request(
|
|
366
|
+
{
|
|
367
|
+
host: '127.0.0.1',
|
|
368
|
+
port: spec.port,
|
|
369
|
+
path: spec.healthPath || '/healthz',
|
|
370
|
+
method: 'GET',
|
|
371
|
+
timeout: timeoutMs,
|
|
372
|
+
},
|
|
373
|
+
(res) => {
|
|
374
|
+
res.resume();
|
|
375
|
+
res.on('end', () => resolve(res.statusCode >= 200 && res.statusCode < 500));
|
|
376
|
+
},
|
|
377
|
+
);
|
|
378
|
+
req.on('timeout', () => {
|
|
379
|
+
req.destroy();
|
|
380
|
+
resolve(false);
|
|
381
|
+
});
|
|
382
|
+
req.on('error', () => resolve(false));
|
|
383
|
+
req.end();
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
|
|
346
387
|
function createManagedProcess(spec, options) {
|
|
347
388
|
const { stream } = openServiceLog(spec.name);
|
|
348
389
|
let child = null;
|
|
349
390
|
let stopped = false;
|
|
350
391
|
let restartTimer = null;
|
|
392
|
+
let healthTimer = null;
|
|
393
|
+
let healthFailures = 0;
|
|
394
|
+
let startedAt = 0;
|
|
395
|
+
let restartDelayMs = CHILD_RESTART_POLICY.initialDelayMs;
|
|
396
|
+
let restartHistory = [];
|
|
397
|
+
let state = 'starting';
|
|
398
|
+
|
|
399
|
+
const setState = (nextState, extra = {}) => {
|
|
400
|
+
state = nextState;
|
|
401
|
+
appendServiceState(spec.name, {
|
|
402
|
+
status: state,
|
|
403
|
+
pid: child?.pid || null,
|
|
404
|
+
restart_count_window: restartHistory.length,
|
|
405
|
+
...extra,
|
|
406
|
+
});
|
|
407
|
+
};
|
|
408
|
+
|
|
409
|
+
const clearHealthTimer = () => {
|
|
410
|
+
if (healthTimer) clearTimeout(healthTimer);
|
|
411
|
+
healthTimer = null;
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
const scheduleHealthCheck = () => {
|
|
415
|
+
clearHealthTimer();
|
|
416
|
+
if (stopped) return;
|
|
417
|
+
healthTimer = setTimeout(async () => {
|
|
418
|
+
if (stopped || !child) return;
|
|
419
|
+
if (Date.now() - startedAt < CHILD_HEALTH_POLICY.graceMs) {
|
|
420
|
+
scheduleHealthCheck();
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
const ok = await checkServiceHealth(spec);
|
|
424
|
+
if (stopped || !child) return;
|
|
425
|
+
if (ok) {
|
|
426
|
+
healthFailures = 0;
|
|
427
|
+
if (state !== 'running') setState('running');
|
|
428
|
+
} else {
|
|
429
|
+
healthFailures += 1;
|
|
430
|
+
writePrefixed(
|
|
431
|
+
stream,
|
|
432
|
+
spec.name,
|
|
433
|
+
`health check failed (${healthFailures}/${CHILD_HEALTH_POLICY.maxFailures})\n`,
|
|
434
|
+
options.foreground ? process.stderr : null,
|
|
435
|
+
);
|
|
436
|
+
if (healthFailures >= CHILD_HEALTH_POLICY.maxFailures) {
|
|
437
|
+
setState('restarting', { reason: 'health_check_failed' });
|
|
438
|
+
try {
|
|
439
|
+
child.kill('SIGTERM');
|
|
440
|
+
} catch {
|
|
441
|
+
// noop
|
|
442
|
+
}
|
|
443
|
+
const pid = child.pid;
|
|
444
|
+
setTimeout(() => {
|
|
445
|
+
if (child && child.pid === pid) {
|
|
446
|
+
try {
|
|
447
|
+
child.kill('SIGKILL');
|
|
448
|
+
} catch {
|
|
449
|
+
// noop
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}, 5000);
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
scheduleHealthCheck();
|
|
457
|
+
}, CHILD_HEALTH_POLICY.intervalMs);
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
const canRestart = () => {
|
|
461
|
+
const now = Date.now();
|
|
462
|
+
restartHistory = restartHistory.filter((time) => now - time <= CHILD_RESTART_POLICY.windowMs);
|
|
463
|
+
if (restartHistory.length >= CHILD_RESTART_POLICY.maxRestarts) {
|
|
464
|
+
setState('degraded', {
|
|
465
|
+
reason: 'restart_limit_exceeded',
|
|
466
|
+
next_retry_at: null,
|
|
467
|
+
});
|
|
468
|
+
writePrefixed(
|
|
469
|
+
stream,
|
|
470
|
+
spec.name,
|
|
471
|
+
`entered degraded state after ${restartHistory.length} restarts in ${CHILD_RESTART_POLICY.windowMs}ms\n`,
|
|
472
|
+
options.foreground ? process.stderr : null,
|
|
473
|
+
);
|
|
474
|
+
return false;
|
|
475
|
+
}
|
|
476
|
+
restartHistory.push(now);
|
|
477
|
+
return true;
|
|
478
|
+
};
|
|
351
479
|
|
|
352
480
|
const launch = () => {
|
|
353
481
|
if (stopped) return;
|
|
482
|
+
clearHealthTimer();
|
|
483
|
+
healthFailures = 0;
|
|
484
|
+
startedAt = Date.now();
|
|
485
|
+
setState('starting');
|
|
354
486
|
child = spawn(spec.command, spec.args, {
|
|
355
487
|
cwd: RUNTIME_DIR,
|
|
356
488
|
env: spec.env,
|
|
@@ -364,6 +496,8 @@ function createManagedProcess(spec, options) {
|
|
|
364
496
|
`started pid=${child.pid} command=${[spec.command, ...spec.args].join(' ')}\n`,
|
|
365
497
|
options.foreground ? process.stdout : null,
|
|
366
498
|
);
|
|
499
|
+
setState('running', { started_at: new Date(startedAt).toISOString() });
|
|
500
|
+
scheduleHealthCheck();
|
|
367
501
|
|
|
368
502
|
child.stdout.on('data', (chunk) => {
|
|
369
503
|
writePrefixed(stream, spec.name, chunk, options.foreground ? process.stdout : null);
|
|
@@ -372,15 +506,35 @@ function createManagedProcess(spec, options) {
|
|
|
372
506
|
writePrefixed(stream, spec.name, chunk, options.foreground ? process.stderr : null);
|
|
373
507
|
});
|
|
374
508
|
child.on('exit', (code, signal) => {
|
|
509
|
+
clearHealthTimer();
|
|
375
510
|
writePrefixed(
|
|
376
511
|
stream,
|
|
377
512
|
spec.name,
|
|
378
513
|
`exited code=${code ?? ''} signal=${signal ?? ''}\n`,
|
|
379
514
|
options.foreground ? process.stderr : null,
|
|
380
515
|
);
|
|
516
|
+
const runtimeMs = Date.now() - startedAt;
|
|
381
517
|
child = null;
|
|
382
518
|
if (!stopped) {
|
|
383
|
-
|
|
519
|
+
if (runtimeMs >= CHILD_RESTART_POLICY.stableAfterMs) {
|
|
520
|
+
restartDelayMs = CHILD_RESTART_POLICY.initialDelayMs;
|
|
521
|
+
restartHistory = [];
|
|
522
|
+
}
|
|
523
|
+
if (!canRestart()) return;
|
|
524
|
+
const delay = restartDelayMs;
|
|
525
|
+
restartDelayMs = Math.min(restartDelayMs * 2, CHILD_RESTART_POLICY.maxDelayMs);
|
|
526
|
+
setState('restarting', {
|
|
527
|
+
exit_code: code ?? null,
|
|
528
|
+
exit_signal: signal || null,
|
|
529
|
+
next_retry_at: new Date(Date.now() + delay).toISOString(),
|
|
530
|
+
});
|
|
531
|
+
writePrefixed(
|
|
532
|
+
stream,
|
|
533
|
+
spec.name,
|
|
534
|
+
`restart scheduled in ${delay}ms\n`,
|
|
535
|
+
options.foreground ? process.stderr : null,
|
|
536
|
+
);
|
|
537
|
+
restartTimer = setTimeout(launch, delay);
|
|
384
538
|
}
|
|
385
539
|
});
|
|
386
540
|
};
|
|
@@ -391,6 +545,8 @@ function createManagedProcess(spec, options) {
|
|
|
391
545
|
stop() {
|
|
392
546
|
stopped = true;
|
|
393
547
|
if (restartTimer) clearTimeout(restartTimer);
|
|
548
|
+
clearHealthTimer();
|
|
549
|
+
setState('stopping');
|
|
394
550
|
if (child && !child.killed) {
|
|
395
551
|
try {
|
|
396
552
|
child.kill('SIGTERM');
|
|
@@ -408,6 +564,21 @@ async function startSupervisor(options = {}) {
|
|
|
408
564
|
ensureDir(AMALGM_DIR, 0o700);
|
|
409
565
|
ensureDir(LOG_DIR, 0o700);
|
|
410
566
|
|
|
567
|
+
const activeSupervisors = supervisorProcesses({ excludePids: [process.pid] });
|
|
568
|
+
if (activeSupervisors.length > 0) {
|
|
569
|
+
const pids = activeSupervisors
|
|
570
|
+
.filter((item) => isPidRunning(item.pid))
|
|
571
|
+
.map((item) => item.pid);
|
|
572
|
+
if (pids.length > 0) {
|
|
573
|
+
throw new Error(`Amalgm runtime is already running (pid ${pids.join(', ')}). Run \`amalgm stop\` first.`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
cleanupStaleRuntimeProcesses({
|
|
578
|
+
excludePids: [process.pid],
|
|
579
|
+
logger: options.foreground ? console : null,
|
|
580
|
+
});
|
|
581
|
+
|
|
411
582
|
const nativeResult = ensureNativeBinaries({
|
|
412
583
|
logger: console,
|
|
413
584
|
quiet: !options.foreground,
|
|
@@ -445,6 +616,7 @@ async function startSupervisor(options = {}) {
|
|
|
445
616
|
device_id: record?.device_id || null,
|
|
446
617
|
event_ref: record?.event_ref || null,
|
|
447
618
|
local_only: !!options.localOnly,
|
|
619
|
+
services: [],
|
|
448
620
|
started_at: new Date().toISOString(),
|
|
449
621
|
});
|
|
450
622
|
if (!storedRecord?.computer_id && options.foreground) {
|
|
@@ -459,7 +631,8 @@ async function startSupervisor(options = {}) {
|
|
|
459
631
|
FS_WATCHER_PORT: String(ports.fsWatcher),
|
|
460
632
|
PORT_MONITOR_PORT: String(ports.portMonitor),
|
|
461
633
|
});
|
|
462
|
-
|
|
634
|
+
updateRuntimeState(runtimePortsState(ports));
|
|
635
|
+
const managed = serviceSpecs(record, ports, options).map((spec) => createManagedProcess(spec, options));
|
|
463
636
|
const tunnels = [];
|
|
464
637
|
|
|
465
638
|
if (!options.localOnly && record?.tunnel_url && record?.tunnel_token) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "amalgm",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.44",
|
|
4
4
|
"description": "Amalgm local computer runtime: login, MCP, chat, events, previews, and tunnels.",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
6
|
"private": false,
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"sync-runtime": "node ../../scripts/sync-npm-package-runtime.mjs",
|
|
18
18
|
"prepack": "node ../../scripts/sync-npm-package-runtime.mjs",
|
|
19
19
|
"pack:dry": "npm pack --dry-run",
|
|
20
|
-
"check": "node --check bin/amalgm.js && node --check lib/auth-store.js && node --check lib/cli.js && node --check lib/paths.js && node --check lib/service.js && node --check lib/supervisor.js && node --check lib/tunnel-chat.js && node --check lib/tunnel-events.js && node --check runtime/scripts/runtime-auth.js && node --check runtime/scripts/proxy-token-store.js && node --check runtime/scripts/local-gateway.js && node --check runtime/scripts/port-monitor.js && node --check runtime/scripts/fs-watcher.js && node --check runtime/scripts/chat-server.js && node --check runtime/scripts/chat-server/index.js && node --check runtime/scripts/chat-server/config.js && node --check runtime/scripts/chat-core/tooling/native-binaries.js && node --check runtime/scripts/chat-core/tooling/package-import.js && node --check runtime/scripts/amalgm-mcp/index.js && node --check runtime/scripts/amalgm-mcp/config.js"
|
|
20
|
+
"check": "node --check bin/amalgm.js && node --check lib/auth-store.js && node --check lib/cli.js && node --check lib/paths.js && node --check lib/process-cleanup.js && node --check lib/runtime-manifest.js && node --check lib/service.js && node --check lib/supervisor.js && node --check lib/tunnel-chat.js && node --check lib/tunnel-events.js && node --check runtime/scripts/runtime-auth.js && node --check runtime/scripts/proxy-token-store.js && node --check runtime/scripts/local-gateway.js && node --check runtime/scripts/port-monitor.js && node --check runtime/scripts/fs-watcher.js && node --check runtime/scripts/chat-server.js && node --check runtime/scripts/chat-server/index.js && node --check runtime/scripts/chat-server/config.js && node --check runtime/scripts/chat-core/tooling/native-binaries.js && node --check runtime/scripts/chat-core/tooling/package-import.js && node --check runtime/scripts/amalgm-mcp/index.js && node --check runtime/scripts/amalgm-mcp/config.js"
|
|
21
21
|
},
|
|
22
22
|
"engines": {
|
|
23
23
|
"node": ">=20"
|
|
@@ -26,6 +26,13 @@ const stopping = new Set();
|
|
|
26
26
|
const restartTimers = new Map();
|
|
27
27
|
|
|
28
28
|
const ARTIFACT_ENV_ALLOWLIST = [
|
|
29
|
+
'AMALGM_BIND_HOST',
|
|
30
|
+
'AMALGM_DIR',
|
|
31
|
+
'AMALGM_GATEWAY_PORT',
|
|
32
|
+
'AMALGM_MCP_PORT',
|
|
33
|
+
'AMALGM_RUNTIME_TOKEN',
|
|
34
|
+
'AMALGM_WORKSPACES_DIR',
|
|
35
|
+
'CHAT_SERVER_PORT',
|
|
29
36
|
'PATH',
|
|
30
37
|
'HOME',
|
|
31
38
|
'USER',
|
|
@@ -81,6 +88,8 @@ function buildEnv(artifact) {
|
|
|
81
88
|
AMALGM_ARTIFACT_ID: artifact.id,
|
|
82
89
|
AMALGM_ARTIFACT_REF: artifact.artifactRef,
|
|
83
90
|
AMALGM_ARTIFACT_URL: artifact.publicUrl,
|
|
91
|
+
AMALGM_CHAT_SERVER_URL: `http://127.0.0.1:${process.env.CHAT_SERVER_PORT || 8084}`,
|
|
92
|
+
AMALGM_MCP_URL: `http://127.0.0.1:${process.env.AMALGM_MCP_PORT || 8083}`,
|
|
84
93
|
};
|
|
85
94
|
}
|
|
86
95
|
|
|
@@ -198,7 +198,7 @@ async function executeArtifactEvent(artifactOrTrigger, eventDef, payload, opts =
|
|
|
198
198
|
if (hasSupabase()) {
|
|
199
199
|
supabasePatch('sessions', 'id', codeSessionId, {
|
|
200
200
|
last_message_at: new Date().toISOString(),
|
|
201
|
-
status: '
|
|
201
|
+
status: 'complete',
|
|
202
202
|
new_messages: true,
|
|
203
203
|
}).catch(() => {});
|
|
204
204
|
}
|
|
@@ -233,6 +233,14 @@ async function executeTask(task) {
|
|
|
233
233
|
console.log(
|
|
234
234
|
`[AmalgmMCP:Exec] Task ${task.id} ${status} in ${durationMs}ms (session: ${codeSessionId})`,
|
|
235
235
|
);
|
|
236
|
+
|
|
237
|
+
if (hasSupabase()) {
|
|
238
|
+
supabasePatch('sessions', 'id', codeSessionId, {
|
|
239
|
+
last_message_at: new Date().toISOString(),
|
|
240
|
+
status: 'complete',
|
|
241
|
+
new_messages: true,
|
|
242
|
+
}).catch(() => {});
|
|
243
|
+
}
|
|
236
244
|
} catch (err) {
|
|
237
245
|
if (err.name === 'AbortError') {
|
|
238
246
|
appendRunLog(task.id, {
|