svamp-cli 0.1.56 → 0.1.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api-Cegey1dh.mjs +140 -0
- package/dist/cli.mjs +32 -18
- package/dist/{commands-CraYxTcv.mjs → commands-DPZbSIdL.mjs} +1 -1
- package/dist/{commands-ZuFXrcot.mjs → commands-Dvftls28.mjs} +4 -157
- package/dist/commands-jfZbr1Qh.mjs +593 -0
- package/dist/index.mjs +1 -1
- package/dist/{package-4AMqauyI.mjs → package-J1yNZur7.mjs} +3 -2
- package/dist/{run-FHqgSPLk.mjs → run-BnUNgLTL.mjs} +1 -1
- package/dist/{run-sALUrMgm.mjs → run-CtrH9ayy.mjs} +601 -44
- package/dist/{tunnel-DhVAOdGd.mjs → tunnel-Dh1bJZ6R.mjs} +5 -2
- package/package.json +3 -2
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import os__default from 'os';
|
|
2
|
-
import fs from 'fs/promises';
|
|
1
|
+
import{createRequire as _pkgrollCR}from"node:module";const require=_pkgrollCR(import.meta.url);import os__default from 'os';
|
|
2
|
+
import fs, { mkdir as mkdir$1, readdir, readFile, writeFile, unlink } from 'fs/promises';
|
|
3
3
|
import { readFileSync as readFileSync$1, mkdirSync, writeFileSync, existsSync as existsSync$1, copyFileSync, unlinkSync, watch, rmdirSync } from 'fs';
|
|
4
|
-
import { join, dirname, resolve, basename } from 'path';
|
|
4
|
+
import path, { join, dirname, resolve, basename } from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
import { spawn as spawn$1 } from 'child_process';
|
|
7
7
|
import { randomUUID as randomUUID$1 } from 'crypto';
|
|
@@ -632,6 +632,96 @@ async function registerMachineService(server, machineId, metadata, daemonState,
|
|
|
632
632
|
return { success: false, entries: [], path: targetPath, error: err.message };
|
|
633
633
|
}
|
|
634
634
|
},
|
|
635
|
+
// ── Process supervisor RPC ────────────────────────────────────────
|
|
636
|
+
/** List all supervised processes. */
|
|
637
|
+
processList: async (params = {}, context) => {
|
|
638
|
+
authorizeRequest(context, currentMetadata.sharing, "view");
|
|
639
|
+
if (!handlers.supervisor) return [];
|
|
640
|
+
return handlers.supervisor.list();
|
|
641
|
+
},
|
|
642
|
+
/** Add and start a new supervised process. */
|
|
643
|
+
processAdd: async (params, context) => {
|
|
644
|
+
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
645
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
646
|
+
return handlers.supervisor.add(params.spec);
|
|
647
|
+
},
|
|
648
|
+
/**
|
|
649
|
+
* Apply a spec declaratively (idempotent, like kubectl apply).
|
|
650
|
+
* Returns { action: 'created'|'updated'|'no-change', info: ProcessInfo }
|
|
651
|
+
*/
|
|
652
|
+
processApply: async (params, context) => {
|
|
653
|
+
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
654
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
655
|
+
return handlers.supervisor.apply(params.spec);
|
|
656
|
+
},
|
|
657
|
+
/**
|
|
658
|
+
* Partially update a process spec and restart it.
|
|
659
|
+
* Returns updated ProcessInfo.
|
|
660
|
+
*/
|
|
661
|
+
processUpdate: async (params, context) => {
|
|
662
|
+
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
663
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
664
|
+
return handlers.supervisor.update(params.idOrName, params.spec);
|
|
665
|
+
},
|
|
666
|
+
/** Get a single process by id or name. */
|
|
667
|
+
processGet: async (params, context) => {
|
|
668
|
+
authorizeRequest(context, currentMetadata.sharing, "view");
|
|
669
|
+
if (!handlers.supervisor) return void 0;
|
|
670
|
+
return handlers.supervisor.get(params.idOrName);
|
|
671
|
+
},
|
|
672
|
+
/** Start a stopped/failed process. */
|
|
673
|
+
processStart: async (params, context) => {
|
|
674
|
+
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
675
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
676
|
+
await handlers.supervisor.start(params.idOrName);
|
|
677
|
+
},
|
|
678
|
+
/** Stop a running process (keeps it in supervision). */
|
|
679
|
+
processStop: async (params, context) => {
|
|
680
|
+
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
681
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
682
|
+
await handlers.supervisor.stop(params.idOrName);
|
|
683
|
+
},
|
|
684
|
+
/** Restart a process. */
|
|
685
|
+
processRestart: async (params, context) => {
|
|
686
|
+
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
687
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
688
|
+
await handlers.supervisor.restart(params.idOrName);
|
|
689
|
+
},
|
|
690
|
+
/** Stop and permanently remove a process from supervision. */
|
|
691
|
+
processRemove: async (params, context) => {
|
|
692
|
+
authorizeRequest(context, currentMetadata.sharing, "admin");
|
|
693
|
+
if (!handlers.supervisor) throw new Error("Process supervisor not available");
|
|
694
|
+
await handlers.supervisor.remove(params.idOrName);
|
|
695
|
+
},
|
|
696
|
+
/** Get recent log lines for a process. */
|
|
697
|
+
processLogs: async (params, context) => {
|
|
698
|
+
authorizeRequest(context, currentMetadata.sharing, "view");
|
|
699
|
+
if (!handlers.supervisor) return [];
|
|
700
|
+
return handlers.supervisor.getLogs(params.idOrName, params.last ?? 50);
|
|
701
|
+
},
|
|
702
|
+
// ── Service group management (proxies to agent-sandbox API) ──────────
|
|
703
|
+
/** List all exposed service groups for this machine's namespace. */
|
|
704
|
+
serviceList: async (context) => {
|
|
705
|
+
authorizeRequest(context, currentMetadata.sharing, "view");
|
|
706
|
+
try {
|
|
707
|
+
const { listServiceGroups } = await import('./api-Cegey1dh.mjs');
|
|
708
|
+
return await listServiceGroups();
|
|
709
|
+
} catch (err) {
|
|
710
|
+
return [];
|
|
711
|
+
}
|
|
712
|
+
},
|
|
713
|
+
/** Get full details of a single service group (includes backends + health). */
|
|
714
|
+
serviceGet: async (params, context) => {
|
|
715
|
+
authorizeRequest(context, currentMetadata.sharing, "view");
|
|
716
|
+
const { getServiceGroup } = await import('./api-Cegey1dh.mjs');
|
|
717
|
+
return getServiceGroup(params.name);
|
|
718
|
+
},
|
|
719
|
+
/** Delete a service group. */
|
|
720
|
+
serviceDelete: async (params, context) => {
|
|
721
|
+
authorizeRequest(context, currentMetadata.sharing, "admin");
|
|
722
|
+
const { deleteServiceGroup } = await import('./api-Cegey1dh.mjs');
|
|
723
|
+
return deleteServiceGroup(params.name);
|
|
724
|
+
},
|
|
635
725
|
// WISE voice — create ephemeral token for OpenAI Realtime API
|
|
636
726
|
wiseCreateEphemeralToken: async (params, context) => {
|
|
637
727
|
authorizeRequest(context, currentMetadata.sharing, "interact");
|
|
@@ -3694,6 +3784,442 @@ function sanitizeEnvForSharing(env) {
|
|
|
3694
3784
|
return sanitized;
|
|
3695
3785
|
}
|
|
3696
3786
|
|
|
3787
|
+
const DEFAULT_PROBE_INTERVAL_S = 10;
|
|
3788
|
+
const DEFAULT_PROBE_TIMEOUT_S = 5;
|
|
3789
|
+
const DEFAULT_PROBE_FAILURE_THRESHOLD = 3;
|
|
3790
|
+
const MAX_LOG_LINES = 300;
|
|
3791
|
+
class ProcessSupervisor {
|
|
3792
|
+
entries = /* @__PURE__ */ new Map();
|
|
3793
|
+
persistDir;
|
|
3794
|
+
constructor(persistDir) {
|
|
3795
|
+
this.persistDir = persistDir;
|
|
3796
|
+
}
|
|
3797
|
+
// ── Lifecycle ─────────────────────────────────────────────────────────────
|
|
3798
|
+
/** Must be called once after construction to load persisted specs. */
|
|
3799
|
+
async init() {
|
|
3800
|
+
await mkdir$1(this.persistDir, { recursive: true });
|
|
3801
|
+
await this.loadAll();
|
|
3802
|
+
}
|
|
3803
|
+
/** Stop all managed processes (called on daemon shutdown). */
|
|
3804
|
+
async stopAll() {
|
|
3805
|
+
const ids = Array.from(this.entries.keys());
|
|
3806
|
+
await Promise.all(ids.map((id) => this.stop(id).catch(() => {
|
|
3807
|
+
})));
|
|
3808
|
+
}
|
|
3809
|
+
// ── Public API ────────────────────────────────────────────────────────────
|
|
3810
|
+
/**
|
|
3811
|
+
* Add a new supervised process and start it immediately.
|
|
3812
|
+
* Throws if a process with the same name already exists.
|
|
3813
|
+
*/
|
|
3814
|
+
async add(spec) {
|
|
3815
|
+
for (const entry2 of this.entries.values()) {
|
|
3816
|
+
if (entry2.spec.name === spec.name) {
|
|
3817
|
+
throw new Error(`Process '${spec.name}' already exists (id: ${entry2.spec.id}). Use restart or remove first.`);
|
|
3818
|
+
}
|
|
3819
|
+
}
|
|
3820
|
+
const fullSpec = {
|
|
3821
|
+
...spec,
|
|
3822
|
+
id: randomUUID$1(),
|
|
3823
|
+
createdAt: Date.now()
|
|
3824
|
+
};
|
|
3825
|
+
const entry = this.makeEntry(fullSpec);
|
|
3826
|
+
this.entries.set(fullSpec.id, entry);
|
|
3827
|
+
await this.persistSpec(fullSpec);
|
|
3828
|
+
await this.startEntry(
|
|
3829
|
+
entry,
|
|
3830
|
+
false
|
|
3831
|
+
/* onRestore */
|
|
3832
|
+
);
|
|
3833
|
+
return this.toInfo(entry);
|
|
3834
|
+
}
|
|
3835
|
+
/** Start a stopped/failed process by id or name. */
|
|
3836
|
+
async start(idOrName) {
|
|
3837
|
+
const entry = this.require(idOrName);
|
|
3838
|
+
if (entry.child && !entry.stopping) throw new Error(`Process '${entry.spec.name}' is already running`);
|
|
3839
|
+
entry.stopping = false;
|
|
3840
|
+
await this.startEntry(entry, false);
|
|
3841
|
+
}
|
|
3842
|
+
/** Stop a running process. Does NOT remove it from supervision. */
|
|
3843
|
+
async stop(idOrName) {
|
|
3844
|
+
const entry = this.require(idOrName);
|
|
3845
|
+
entry.stopping = true;
|
|
3846
|
+
this.clearTimers(entry);
|
|
3847
|
+
if (entry.child) {
|
|
3848
|
+
await this.killChild(entry.child);
|
|
3849
|
+
entry.child = void 0;
|
|
3850
|
+
}
|
|
3851
|
+
entry.state.status = "stopped";
|
|
3852
|
+
entry.state.stoppedAt = Date.now();
|
|
3853
|
+
entry.state.pid = void 0;
|
|
3854
|
+
}
|
|
3855
|
+
/** Restart a process (stop if running, then start again). */
|
|
3856
|
+
async restart(idOrName) {
|
|
3857
|
+
const entry = this.require(idOrName);
|
|
3858
|
+
if (entry.child) {
|
|
3859
|
+
entry.stopping = true;
|
|
3860
|
+
this.clearTimers(entry);
|
|
3861
|
+
await this.killChild(entry.child);
|
|
3862
|
+
entry.child = void 0;
|
|
3863
|
+
}
|
|
3864
|
+
entry.stopping = false;
|
|
3865
|
+
entry.state.restartCount++;
|
|
3866
|
+
await this.startEntry(entry, false);
|
|
3867
|
+
}
|
|
3868
|
+
/** Stop the process and remove it from supervision (deletes persisted spec). */
|
|
3869
|
+
async remove(idOrName) {
|
|
3870
|
+
const entry = this.require(idOrName);
|
|
3871
|
+
const id = entry.spec.id;
|
|
3872
|
+
await this.stop(entry.spec.name);
|
|
3873
|
+
this.entries.delete(id);
|
|
3874
|
+
await this.deleteSpec(id);
|
|
3875
|
+
}
|
|
3876
|
+
/** List all supervised processes. */
|
|
3877
|
+
list() {
|
|
3878
|
+
return Array.from(this.entries.values()).map((e) => this.toInfo(e));
|
|
3879
|
+
}
|
|
3880
|
+
/** Get a single process by id or name. Returns undefined if not found. */
|
|
3881
|
+
get(idOrName) {
|
|
3882
|
+
const entry = this.findByIdOrName(idOrName);
|
|
3883
|
+
return entry ? this.toInfo(entry) : void 0;
|
|
3884
|
+
}
|
|
3885
|
+
/** Return the last N log lines for a process. */
|
|
3886
|
+
getLogs(idOrName, last = 50) {
|
|
3887
|
+
const entry = this.findByIdOrName(idOrName);
|
|
3888
|
+
if (!entry) return [];
|
|
3889
|
+
const buf = entry.logBuffer;
|
|
3890
|
+
return last <= 0 ? [...buf] : buf.slice(-last);
|
|
3891
|
+
}
|
|
3892
|
+
/**
|
|
3893
|
+
* Apply a spec declaratively (idempotent, like `kubectl apply`).
|
|
3894
|
+
*
|
|
3895
|
+
* - If no process with spec.name exists → create and start (action: 'created')
|
|
3896
|
+
* - If exists and spec is unchanged → no-op (action: 'no-change')
|
|
3897
|
+
* - If exists and spec changed → update config + restart (action: 'updated')
|
|
3898
|
+
*/
|
|
3899
|
+
async apply(spec) {
|
|
3900
|
+
const existing = Array.from(this.entries.values()).find((e) => e.spec.name === spec.name);
|
|
3901
|
+
if (!existing) {
|
|
3902
|
+
const info = await this.add(spec);
|
|
3903
|
+
return { action: "created", info };
|
|
3904
|
+
}
|
|
3905
|
+
if (this.specsEqual(existing.spec, spec)) {
|
|
3906
|
+
return { action: "no-change", info: this.toInfo(existing) };
|
|
3907
|
+
}
|
|
3908
|
+
const updatedSpec = {
|
|
3909
|
+
...spec,
|
|
3910
|
+
id: existing.spec.id,
|
|
3911
|
+
createdAt: existing.spec.createdAt
|
|
3912
|
+
// preserve original creation time
|
|
3913
|
+
};
|
|
3914
|
+
existing.spec = updatedSpec;
|
|
3915
|
+
await this.persistSpec(updatedSpec);
|
|
3916
|
+
existing.stopping = true;
|
|
3917
|
+
this.clearTimers(existing);
|
|
3918
|
+
if (existing.child) {
|
|
3919
|
+
await this.killChild(existing.child);
|
|
3920
|
+
existing.child = void 0;
|
|
3921
|
+
}
|
|
3922
|
+
existing.stopping = false;
|
|
3923
|
+
existing.state.status = "starting";
|
|
3924
|
+
this.spawnProcess(existing);
|
|
3925
|
+
return { action: "updated", info: this.toInfo(existing) };
|
|
3926
|
+
}
|
|
3927
|
+
/**
|
|
3928
|
+
* Update a running process's spec and restart it.
|
|
3929
|
+
* Merges the provided partial spec over the existing spec.
|
|
3930
|
+
*/
|
|
3931
|
+
async update(idOrName, partialSpec) {
|
|
3932
|
+
const entry = this.require(idOrName);
|
|
3933
|
+
const updatedSpec = { ...entry.spec, ...partialSpec };
|
|
3934
|
+
entry.spec = updatedSpec;
|
|
3935
|
+
await this.persistSpec(updatedSpec);
|
|
3936
|
+
entry.stopping = true;
|
|
3937
|
+
this.clearTimers(entry);
|
|
3938
|
+
if (entry.child) {
|
|
3939
|
+
await this.killChild(entry.child);
|
|
3940
|
+
entry.child = void 0;
|
|
3941
|
+
}
|
|
3942
|
+
entry.stopping = false;
|
|
3943
|
+
entry.state.status = "starting";
|
|
3944
|
+
this.spawnProcess(entry);
|
|
3945
|
+
return this.toInfo(entry);
|
|
3946
|
+
}
|
|
3947
|
+
// ── Spec equality ─────────────────────────────────────────────────────────
|
|
3948
|
+
/** Compare two specs for equality (ignoring id, createdAt, and runtime state). */
|
|
3949
|
+
specsEqual(a, b) {
|
|
3950
|
+
const pick = (s) => ({
|
|
3951
|
+
name: s.name,
|
|
3952
|
+
command: s.command,
|
|
3953
|
+
args: JSON.stringify(s.args),
|
|
3954
|
+
workdir: s.workdir,
|
|
3955
|
+
env: JSON.stringify(s.env ?? {}),
|
|
3956
|
+
keepAlive: s.keepAlive,
|
|
3957
|
+
maxRestarts: s.maxRestarts,
|
|
3958
|
+
restartDelay: s.restartDelay,
|
|
3959
|
+
ttl: s.ttl,
|
|
3960
|
+
probe: JSON.stringify(s.probe ?? null),
|
|
3961
|
+
serviceGroup: s.serviceGroup,
|
|
3962
|
+
ports: JSON.stringify(s.ports ?? [])
|
|
3963
|
+
});
|
|
3964
|
+
return JSON.stringify(pick(a)) === JSON.stringify(pick(b));
|
|
3965
|
+
}
|
|
3966
|
+
// ── Persistence ───────────────────────────────────────────────────────────
|
|
3967
|
+
async loadAll() {
|
|
3968
|
+
let files;
|
|
3969
|
+
try {
|
|
3970
|
+
files = await readdir(this.persistDir);
|
|
3971
|
+
} catch {
|
|
3972
|
+
return;
|
|
3973
|
+
}
|
|
3974
|
+
let loaded = 0;
|
|
3975
|
+
for (const file of files) {
|
|
3976
|
+
if (!file.endsWith(".json")) continue;
|
|
3977
|
+
try {
|
|
3978
|
+
const raw = await readFile(path.join(this.persistDir, file), "utf-8");
|
|
3979
|
+
const spec = JSON.parse(raw);
|
|
3980
|
+
const entry = this.makeEntry(spec);
|
|
3981
|
+
this.entries.set(spec.id, entry);
|
|
3982
|
+
if (spec.keepAlive) {
|
|
3983
|
+
await this.startEntry(
|
|
3984
|
+
entry,
|
|
3985
|
+
true
|
|
3986
|
+
/* onRestore */
|
|
3987
|
+
);
|
|
3988
|
+
}
|
|
3989
|
+
loaded++;
|
|
3990
|
+
} catch (err) {
|
|
3991
|
+
console.error(`[SUPERVISOR] Failed to load process spec ${file}: ${err.message}`);
|
|
3992
|
+
}
|
|
3993
|
+
}
|
|
3994
|
+
if (loaded > 0) {
|
|
3995
|
+
console.log(`[SUPERVISOR] Restored ${loaded} supervised process(es)`);
|
|
3996
|
+
}
|
|
3997
|
+
}
|
|
3998
|
+
async persistSpec(spec) {
|
|
3999
|
+
const filePath = path.join(this.persistDir, `${spec.id}.json`);
|
|
4000
|
+
await writeFile(filePath, JSON.stringify(spec, null, 2), "utf-8");
|
|
4001
|
+
}
|
|
4002
|
+
async deleteSpec(id) {
|
|
4003
|
+
try {
|
|
4004
|
+
await unlink(path.join(this.persistDir, `${id}.json`));
|
|
4005
|
+
} catch {
|
|
4006
|
+
}
|
|
4007
|
+
}
|
|
4008
|
+
// ── Internal helpers ──────────────────────────────────────────────────────
|
|
4009
|
+
makeEntry(spec) {
|
|
4010
|
+
return {
|
|
4011
|
+
spec,
|
|
4012
|
+
state: {
|
|
4013
|
+
id: spec.id,
|
|
4014
|
+
status: "pending",
|
|
4015
|
+
restartCount: 0,
|
|
4016
|
+
consecutiveProbeFailures: 0
|
|
4017
|
+
},
|
|
4018
|
+
logBuffer: [],
|
|
4019
|
+
stopping: false
|
|
4020
|
+
};
|
|
4021
|
+
}
|
|
4022
|
+
require(idOrName) {
|
|
4023
|
+
const entry = this.findByIdOrName(idOrName);
|
|
4024
|
+
if (!entry) throw new Error(`Process '${idOrName}' not found`);
|
|
4025
|
+
return entry;
|
|
4026
|
+
}
|
|
4027
|
+
findByIdOrName(idOrName) {
|
|
4028
|
+
return this.entries.get(idOrName) ?? Array.from(this.entries.values()).find((e) => e.spec.name === idOrName);
|
|
4029
|
+
}
|
|
4030
|
+
toInfo(entry) {
|
|
4031
|
+
return { spec: entry.spec, state: { ...entry.state } };
|
|
4032
|
+
}
|
|
4033
|
+
// ── Process spawning ──────────────────────────────────────────────────────
|
|
4034
|
+
async startEntry(entry, onRestore) {
|
|
4035
|
+
const { spec } = entry;
|
|
4036
|
+
if (spec.ttl !== void 0 && onRestore) {
|
|
4037
|
+
const elapsedS = (Date.now() - spec.createdAt) / 1e3;
|
|
4038
|
+
if (elapsedS >= spec.ttl) {
|
|
4039
|
+
console.log(`[SUPERVISOR] Process '${spec.name}' TTL expired on restore, removing`);
|
|
4040
|
+
entry.state.status = "expired";
|
|
4041
|
+
this.entries.delete(spec.id);
|
|
4042
|
+
await this.deleteSpec(spec.id);
|
|
4043
|
+
return;
|
|
4044
|
+
}
|
|
4045
|
+
}
|
|
4046
|
+
entry.state.status = "starting";
|
|
4047
|
+
entry.state.pid = void 0;
|
|
4048
|
+
entry.state.startedAt = void 0;
|
|
4049
|
+
entry.state.consecutiveProbeFailures = 0;
|
|
4050
|
+
this.spawnProcess(entry);
|
|
4051
|
+
}
|
|
4052
|
+
spawnProcess(entry) {
|
|
4053
|
+
const { spec, state } = entry;
|
|
4054
|
+
try {
|
|
4055
|
+
const env = { ...process.env, ...spec.env ?? {} };
|
|
4056
|
+
const child = spawn$1(spec.command, spec.args, {
|
|
4057
|
+
cwd: spec.workdir,
|
|
4058
|
+
env,
|
|
4059
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
4060
|
+
});
|
|
4061
|
+
entry.child = child;
|
|
4062
|
+
state.status = "running";
|
|
4063
|
+
state.pid = child.pid;
|
|
4064
|
+
state.startedAt = Date.now();
|
|
4065
|
+
const appendLog = (chunk) => {
|
|
4066
|
+
for (const line of chunk.toString().split("\n")) {
|
|
4067
|
+
if (!line) continue;
|
|
4068
|
+
entry.logBuffer.push(line);
|
|
4069
|
+
if (entry.logBuffer.length > MAX_LOG_LINES) entry.logBuffer.shift();
|
|
4070
|
+
}
|
|
4071
|
+
};
|
|
4072
|
+
child.stdout?.on("data", appendLog);
|
|
4073
|
+
child.stderr?.on("data", appendLog);
|
|
4074
|
+
child.on("exit", (code, signal) => this.onProcessExit(entry, code, signal));
|
|
4075
|
+
if (spec.probe) this.setupProbe(entry);
|
|
4076
|
+
if (spec.ttl !== void 0) this.setupTTL(entry);
|
|
4077
|
+
console.log(`[SUPERVISOR] Started '${spec.name}' pid=${child.pid}`);
|
|
4078
|
+
} catch (err) {
|
|
4079
|
+
state.status = "failed";
|
|
4080
|
+
state.stoppedAt = Date.now();
|
|
4081
|
+
console.error(`[SUPERVISOR] Failed to spawn '${spec.name}': ${err.message}`);
|
|
4082
|
+
}
|
|
4083
|
+
}
|
|
4084
|
+
onProcessExit(entry, code, signal) {
|
|
4085
|
+
const { spec, state } = entry;
|
|
4086
|
+
entry.child = void 0;
|
|
4087
|
+
state.pid = void 0;
|
|
4088
|
+
state.stoppedAt = Date.now();
|
|
4089
|
+
this.clearTimers(entry);
|
|
4090
|
+
if (entry.stopping) {
|
|
4091
|
+
state.status = "stopped";
|
|
4092
|
+
console.log(`[SUPERVISOR] Process '${spec.name}' stopped (code=${code})`);
|
|
4093
|
+
return;
|
|
4094
|
+
}
|
|
4095
|
+
const crashed = code !== 0 || signal !== null;
|
|
4096
|
+
state.status = crashed ? "failed" : "stopped";
|
|
4097
|
+
console.log(`[SUPERVISOR] Process '${spec.name}' exited (code=${code}, signal=${signal})`);
|
|
4098
|
+
if (!spec.keepAlive) return;
|
|
4099
|
+
if (spec.maxRestarts > 0 && state.restartCount >= spec.maxRestarts) {
|
|
4100
|
+
console.warn(`[SUPERVISOR] Process '${spec.name}' reached max restarts (${spec.maxRestarts}), not restarting`);
|
|
4101
|
+
state.status = "failed";
|
|
4102
|
+
return;
|
|
4103
|
+
}
|
|
4104
|
+
const delayMs = spec.restartDelay * 1e3;
|
|
4105
|
+
console.log(`[SUPERVISOR] Scheduling restart of '${spec.name}' in ${delayMs}ms (restart #${state.restartCount + 1})`);
|
|
4106
|
+
entry.restartTimer = setTimeout(() => {
|
|
4107
|
+
if (entry.stopping) return;
|
|
4108
|
+
state.restartCount++;
|
|
4109
|
+
state.status = "starting";
|
|
4110
|
+
this.spawnProcess(entry);
|
|
4111
|
+
}, delayMs);
|
|
4112
|
+
}
|
|
4113
|
+
// ── Health probes ─────────────────────────────────────────────────────────
|
|
4114
|
+
setupProbe(entry) {
|
|
4115
|
+
const intervalMs = (entry.spec.probe.interval ?? DEFAULT_PROBE_INTERVAL_S) * 1e3;
|
|
4116
|
+
entry.probeTimer = setInterval(() => {
|
|
4117
|
+
this.runHealthCheck(entry).catch(() => {
|
|
4118
|
+
});
|
|
4119
|
+
}, intervalMs);
|
|
4120
|
+
}
|
|
4121
|
+
async runHealthCheck(entry) {
|
|
4122
|
+
if (!entry.child || entry.state.status !== "running") return;
|
|
4123
|
+
const probe = entry.spec.probe;
|
|
4124
|
+
const urlPath = probe.path ?? "/";
|
|
4125
|
+
const timeoutMs = (probe.timeout ?? DEFAULT_PROBE_TIMEOUT_S) * 1e3;
|
|
4126
|
+
const threshold = probe.failureThreshold ?? DEFAULT_PROBE_FAILURE_THRESHOLD;
|
|
4127
|
+
const url = `http://localhost:${probe.port}${urlPath}`;
|
|
4128
|
+
try {
|
|
4129
|
+
const controller = new AbortController();
|
|
4130
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
4131
|
+
let resp;
|
|
4132
|
+
try {
|
|
4133
|
+
resp = await fetch(url, { signal: controller.signal });
|
|
4134
|
+
} finally {
|
|
4135
|
+
clearTimeout(timer);
|
|
4136
|
+
}
|
|
4137
|
+
const ok = resp.ok;
|
|
4138
|
+
entry.state.lastProbe = { ok, timestamp: Date.now(), statusCode: resp.status };
|
|
4139
|
+
if (ok) {
|
|
4140
|
+
entry.state.consecutiveProbeFailures = 0;
|
|
4141
|
+
} else {
|
|
4142
|
+
entry.state.consecutiveProbeFailures++;
|
|
4143
|
+
console.warn(
|
|
4144
|
+
`[SUPERVISOR] Probe FAIL '${entry.spec.name}' HTTP ${resp.status} (${entry.state.consecutiveProbeFailures}/${threshold})`
|
|
4145
|
+
);
|
|
4146
|
+
if (entry.state.consecutiveProbeFailures >= threshold) {
|
|
4147
|
+
await this.triggerProbeRestart(entry);
|
|
4148
|
+
}
|
|
4149
|
+
}
|
|
4150
|
+
} catch (err) {
|
|
4151
|
+
entry.state.lastProbe = { ok: false, timestamp: Date.now(), error: err.message };
|
|
4152
|
+
entry.state.consecutiveProbeFailures++;
|
|
4153
|
+
console.warn(
|
|
4154
|
+
`[SUPERVISOR] Probe ERROR '${entry.spec.name}' ${err.message} (${entry.state.consecutiveProbeFailures}/${threshold})`
|
|
4155
|
+
);
|
|
4156
|
+
if (entry.state.consecutiveProbeFailures >= threshold) {
|
|
4157
|
+
await this.triggerProbeRestart(entry);
|
|
4158
|
+
}
|
|
4159
|
+
}
|
|
4160
|
+
}
|
|
4161
|
+
async triggerProbeRestart(entry) {
|
|
4162
|
+
console.warn(`[SUPERVISOR] Restarting '${entry.spec.name}' due to probe failures`);
|
|
4163
|
+
entry.state.consecutiveProbeFailures = 0;
|
|
4164
|
+
this.clearTimers(entry);
|
|
4165
|
+
try {
|
|
4166
|
+
await this.restart(entry.spec.id);
|
|
4167
|
+
} catch (err) {
|
|
4168
|
+
console.error(`[SUPERVISOR] Probe-triggered restart failed for '${entry.spec.name}': ${err.message}`);
|
|
4169
|
+
}
|
|
4170
|
+
}
|
|
4171
|
+
// ── TTL ───────────────────────────────────────────────────────────────────
|
|
4172
|
+
setupTTL(entry) {
|
|
4173
|
+
const elapsedS = (Date.now() - entry.spec.createdAt) / 1e3;
|
|
4174
|
+
const remainingS = entry.spec.ttl - elapsedS;
|
|
4175
|
+
if (remainingS <= 0) {
|
|
4176
|
+
this.expireProcess(entry);
|
|
4177
|
+
return;
|
|
4178
|
+
}
|
|
4179
|
+
console.log(`[SUPERVISOR] Process '${entry.spec.name}' TTL: expires in ${remainingS.toFixed(0)}s`);
|
|
4180
|
+
entry.ttlTimer = setTimeout(() => this.expireProcess(entry), remainingS * 1e3);
|
|
4181
|
+
}
|
|
4182
|
+
expireProcess(entry) {
|
|
4183
|
+
console.log(`[SUPERVISOR] Process '${entry.spec.name}' TTL expired`);
|
|
4184
|
+
entry.state.status = "expired";
|
|
4185
|
+
entry.stopping = true;
|
|
4186
|
+
const cleanup = async () => {
|
|
4187
|
+
if (entry.child) await this.killChild(entry.child);
|
|
4188
|
+
this.entries.delete(entry.spec.id);
|
|
4189
|
+
await this.deleteSpec(entry.spec.id).catch(() => {
|
|
4190
|
+
});
|
|
4191
|
+
};
|
|
4192
|
+
cleanup().catch((err) => console.error("[SUPERVISOR] TTL cleanup error:", err));
|
|
4193
|
+
}
|
|
4194
|
+
// ── Process kill helper ───────────────────────────────────────────────────
|
|
4195
|
+
killChild(child) {
|
|
4196
|
+
return new Promise((resolve) => {
|
|
4197
|
+
const done = () => resolve();
|
|
4198
|
+
child.once("exit", done);
|
|
4199
|
+
child.kill("SIGTERM");
|
|
4200
|
+
const forceKill = setTimeout(() => {
|
|
4201
|
+
child.kill("SIGKILL");
|
|
4202
|
+
}, 5e3);
|
|
4203
|
+
child.once("exit", () => clearTimeout(forceKill));
|
|
4204
|
+
});
|
|
4205
|
+
}
|
|
4206
|
+
// ── Timer cleanup ─────────────────────────────────────────────────────────
|
|
4207
|
+
clearTimers(entry) {
|
|
4208
|
+
if (entry.probeTimer) {
|
|
4209
|
+
clearInterval(entry.probeTimer);
|
|
4210
|
+
entry.probeTimer = void 0;
|
|
4211
|
+
}
|
|
4212
|
+
if (entry.ttlTimer) {
|
|
4213
|
+
clearTimeout(entry.ttlTimer);
|
|
4214
|
+
entry.ttlTimer = void 0;
|
|
4215
|
+
}
|
|
4216
|
+
if (entry.restartTimer) {
|
|
4217
|
+
clearTimeout(entry.restartTimer);
|
|
4218
|
+
entry.restartTimer = void 0;
|
|
4219
|
+
}
|
|
4220
|
+
}
|
|
4221
|
+
}
|
|
4222
|
+
|
|
3697
4223
|
const __filename$1 = fileURLToPath(import.meta.url);
|
|
3698
4224
|
const __dirname$1 = dirname(__filename$1);
|
|
3699
4225
|
function loadEnvFile(path) {
|
|
@@ -4444,6 +4970,8 @@ async function startDaemon(options) {
|
|
|
4444
4970
|
logger.log(` Workspace: ${hyphaWorkspace || "(default)"}`);
|
|
4445
4971
|
logger.log(` Machine ID: ${machineId}`);
|
|
4446
4972
|
let server = null;
|
|
4973
|
+
const supervisor = new ProcessSupervisor(join(SVAMP_HOME, "processes"));
|
|
4974
|
+
await supervisor.init();
|
|
4447
4975
|
try {
|
|
4448
4976
|
logger.log("Connecting to Hypha server...");
|
|
4449
4977
|
server = await connectToHypha({
|
|
@@ -4461,6 +4989,7 @@ async function startDaemon(options) {
|
|
|
4461
4989
|
if (consecutiveHeartbeatFailures > 0) {
|
|
4462
4990
|
logger.log(`Hypha reconnection successful \u2014 services re-registered (resetting ${consecutiveHeartbeatFailures} failures)`);
|
|
4463
4991
|
consecutiveHeartbeatFailures = 0;
|
|
4992
|
+
lastReconnectAt = Date.now();
|
|
4464
4993
|
}
|
|
4465
4994
|
});
|
|
4466
4995
|
const pidToTrackedSession = /* @__PURE__ */ new Map();
|
|
@@ -6025,7 +6554,8 @@ The automated loop has finished. Review the progress above and let me know if yo
|
|
|
6025
6554
|
stopSession,
|
|
6026
6555
|
restartSession,
|
|
6027
6556
|
requestShutdown: () => requestShutdown("hypha-app"),
|
|
6028
|
-
getTrackedSessions: getCurrentChildren
|
|
6557
|
+
getTrackedSessions: getCurrentChildren,
|
|
6558
|
+
supervisor
|
|
6029
6559
|
}
|
|
6030
6560
|
);
|
|
6031
6561
|
logger.log(`Machine service registered: svamp-machine-${machineId}`);
|
|
@@ -6206,7 +6736,9 @@ The automated loop has finished. Review the progress above and let me know if yo
|
|
|
6206
6736
|
const HEARTBEAT_INTERVAL_MS = 1e4;
|
|
6207
6737
|
const PING_TIMEOUT_MS = 5e3;
|
|
6208
6738
|
const MAX_FAILURES = 60;
|
|
6739
|
+
const POST_RECONNECT_GRACE_MS = 2e4;
|
|
6209
6740
|
let heartbeatRunning = false;
|
|
6741
|
+
let lastReconnectAt = 0;
|
|
6210
6742
|
const heartbeatInterval = setInterval(async () => {
|
|
6211
6743
|
if (heartbeatRunning) return;
|
|
6212
6744
|
heartbeatRunning = true;
|
|
@@ -6222,20 +6754,14 @@ The automated loop has finished. Review the progress above and let me know if yo
|
|
|
6222
6754
|
try {
|
|
6223
6755
|
const installedVersion = readPackageVersion();
|
|
6224
6756
|
if (installedVersion !== "unknown" && installedVersion !== DAEMON_VERSION) {
|
|
6225
|
-
logger.log(`svamp-cli version changed on disk: ${DAEMON_VERSION} \u2192 ${installedVersion}. Self-restarting...`);
|
|
6226
6757
|
const supervised2 = process.env.SVAMP_SUPERVISED === "1";
|
|
6227
|
-
if (
|
|
6228
|
-
|
|
6229
|
-
|
|
6230
|
-
|
|
6231
|
-
|
|
6232
|
-
|
|
6233
|
-
env: process.env
|
|
6234
|
-
}).unref();
|
|
6235
|
-
await new Promise((r) => setTimeout(r, 500));
|
|
6758
|
+
if (supervised2) {
|
|
6759
|
+
logger.log(`svamp-cli version changed on disk: ${DAEMON_VERSION} \u2192 ${installedVersion}. Exiting for launchd restart...`);
|
|
6760
|
+
requestShutdown("version-update", `Updated ${DAEMON_VERSION} \u2192 ${installedVersion}`);
|
|
6761
|
+
return;
|
|
6762
|
+
} else {
|
|
6763
|
+
logger.log(`svamp-cli version changed on disk: ${DAEMON_VERSION} \u2192 ${installedVersion}. Run 'svamp daemon stop && svamp daemon start' to apply the update.`);
|
|
6236
6764
|
}
|
|
6237
|
-
requestShutdown("version-update", `Updated ${DAEMON_VERSION} \u2192 ${installedVersion}`);
|
|
6238
|
-
return;
|
|
6239
6765
|
}
|
|
6240
6766
|
} catch {
|
|
6241
6767
|
}
|
|
@@ -6252,36 +6778,65 @@ The automated loop has finished. Review the progress above and let me know if yo
|
|
|
6252
6778
|
}
|
|
6253
6779
|
}
|
|
6254
6780
|
}
|
|
6255
|
-
|
|
6256
|
-
|
|
6257
|
-
|
|
6258
|
-
|
|
6259
|
-
|
|
6260
|
-
|
|
6261
|
-
|
|
6262
|
-
|
|
6263
|
-
|
|
6264
|
-
|
|
6265
|
-
consecutiveHeartbeatFailures++;
|
|
6266
|
-
if (consecutiveHeartbeatFailures === 1) {
|
|
6267
|
-
logger.log(`Ping failed: ${err.message}`);
|
|
6268
|
-
} else if (consecutiveHeartbeatFailures % 6 === 0) {
|
|
6269
|
-
logger.log(`Connection down for ${consecutiveHeartbeatFailures * HEARTBEAT_INTERVAL_MS / 1e3}s (${consecutiveHeartbeatFailures}/${MAX_FAILURES})`);
|
|
6270
|
-
}
|
|
6271
|
-
if (consecutiveHeartbeatFailures === 1 || consecutiveHeartbeatFailures % 3 === 0) {
|
|
6272
|
-
const conn = server.rpc?._connection;
|
|
6273
|
-
const ws = conn?._websocket;
|
|
6274
|
-
if (ws?.readyState === 1) {
|
|
6275
|
-
logger.log("Force-closing stale WebSocket to trigger reconnection");
|
|
6276
|
-
try {
|
|
6277
|
-
ws.close(4e3, "Stale connection");
|
|
6278
|
-
} catch {
|
|
6781
|
+
const inGrace = lastReconnectAt > 0 && Date.now() - lastReconnectAt < POST_RECONNECT_GRACE_MS;
|
|
6782
|
+
if (!inGrace) {
|
|
6783
|
+
try {
|
|
6784
|
+
const pingStart = Date.now();
|
|
6785
|
+
await new Promise((resolve2, reject) => {
|
|
6786
|
+
const conn = server.rpc?._connection;
|
|
6787
|
+
const ws = conn?._websocket;
|
|
6788
|
+
if (!ws || ws.readyState !== 1) {
|
|
6789
|
+
reject(new Error("WebSocket not open"));
|
|
6790
|
+
return;
|
|
6279
6791
|
}
|
|
6792
|
+
const timer = setTimeout(() => {
|
|
6793
|
+
ws.removeEventListener("message", onMsg);
|
|
6794
|
+
reject(new Error("Ping timed out"));
|
|
6795
|
+
}, PING_TIMEOUT_MS);
|
|
6796
|
+
const onMsg = (event) => {
|
|
6797
|
+
try {
|
|
6798
|
+
const d = typeof event.data === "string" ? JSON.parse(event.data) : null;
|
|
6799
|
+
if (d?.type === "pong") {
|
|
6800
|
+
clearTimeout(timer);
|
|
6801
|
+
ws.removeEventListener("message", onMsg);
|
|
6802
|
+
resolve2();
|
|
6803
|
+
}
|
|
6804
|
+
} catch {
|
|
6805
|
+
}
|
|
6806
|
+
};
|
|
6807
|
+
ws.addEventListener("message", onMsg);
|
|
6808
|
+
ws.send(JSON.stringify({ type: "ping" }));
|
|
6809
|
+
});
|
|
6810
|
+
const pingMs = Date.now() - pingStart;
|
|
6811
|
+
if (pingMs > 1e3) {
|
|
6812
|
+
logger.log(`Slow ping: ${pingMs}ms`);
|
|
6813
|
+
}
|
|
6814
|
+
if (consecutiveHeartbeatFailures > 0) {
|
|
6815
|
+
logger.log(`Heartbeat recovered after ${consecutiveHeartbeatFailures} failures`);
|
|
6816
|
+
consecutiveHeartbeatFailures = 0;
|
|
6817
|
+
}
|
|
6818
|
+
} catch (err) {
|
|
6819
|
+
consecutiveHeartbeatFailures++;
|
|
6820
|
+
if (consecutiveHeartbeatFailures === 1) {
|
|
6821
|
+
logger.log(`Ping failed: ${err.message}`);
|
|
6822
|
+
} else if (consecutiveHeartbeatFailures % 6 === 0) {
|
|
6823
|
+
logger.log(`Connection down for ${consecutiveHeartbeatFailures * HEARTBEAT_INTERVAL_MS / 1e3}s (${consecutiveHeartbeatFailures}/${MAX_FAILURES})`);
|
|
6824
|
+
}
|
|
6825
|
+
if (consecutiveHeartbeatFailures === 1 || consecutiveHeartbeatFailures % 3 === 0) {
|
|
6826
|
+
const conn = server.rpc?._connection;
|
|
6827
|
+
const ws = conn?._websocket;
|
|
6828
|
+
if (ws?.readyState === 1) {
|
|
6829
|
+
logger.log("Force-closing stale WebSocket to trigger reconnection");
|
|
6830
|
+
try {
|
|
6831
|
+
ws.close(4e3, "Stale connection");
|
|
6832
|
+
} catch {
|
|
6833
|
+
}
|
|
6834
|
+
}
|
|
6835
|
+
}
|
|
6836
|
+
if (consecutiveHeartbeatFailures >= MAX_FAILURES) {
|
|
6837
|
+
logger.log(`Heartbeat failed ${MAX_FAILURES} times. Shutting down.`);
|
|
6838
|
+
requestShutdown("heartbeat-timeout", err.message);
|
|
6280
6839
|
}
|
|
6281
|
-
}
|
|
6282
|
-
if (consecutiveHeartbeatFailures >= MAX_FAILURES) {
|
|
6283
|
-
logger.log(`Heartbeat failed ${MAX_FAILURES} times. Shutting down.`);
|
|
6284
|
-
requestShutdown("heartbeat-timeout", err.message);
|
|
6285
6840
|
}
|
|
6286
6841
|
}
|
|
6287
6842
|
} finally {
|
|
@@ -6353,6 +6908,8 @@ The automated loop has finished. Review the progress above and let me know if yo
|
|
|
6353
6908
|
await debugService.disconnect();
|
|
6354
6909
|
} catch {
|
|
6355
6910
|
}
|
|
6911
|
+
await supervisor.stopAll().catch(() => {
|
|
6912
|
+
});
|
|
6356
6913
|
artifactSync.destroy();
|
|
6357
6914
|
try {
|
|
6358
6915
|
await server.disconnect();
|