svamp-cli 0.1.56 → 0.1.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import os__default from 'os';
2
- import fs from 'fs/promises';
1
+ import{createRequire as _pkgrollCR}from"node:module";const require=_pkgrollCR(import.meta.url);import os__default from 'os';
2
+ import fs, { mkdir as mkdir$1, readdir, readFile, writeFile, unlink } from 'fs/promises';
3
3
  import { readFileSync as readFileSync$1, mkdirSync, writeFileSync, existsSync as existsSync$1, copyFileSync, unlinkSync, watch, rmdirSync } from 'fs';
4
- import { join, dirname, resolve, basename } from 'path';
4
+ import path, { join, dirname, resolve, basename } from 'path';
5
5
  import { fileURLToPath } from 'url';
6
6
  import { spawn as spawn$1 } from 'child_process';
7
7
  import { randomUUID as randomUUID$1 } from 'crypto';
@@ -632,6 +632,96 @@ async function registerMachineService(server, machineId, metadata, daemonState,
632
632
  return { success: false, entries: [], path: targetPath, error: err.message };
633
633
  }
634
634
  },
635
+ // ── Process supervisor RPC ────────────────────────────────────────
636
+ /** List all supervised processes. */
637
+ processList: async (params = {}, context) => {
638
+ authorizeRequest(context, currentMetadata.sharing, "view");
639
+ if (!handlers.supervisor) return [];
640
+ return handlers.supervisor.list();
641
+ },
642
+ /** Add and start a new supervised process. */
643
+ processAdd: async (params, context) => {
644
+ authorizeRequest(context, currentMetadata.sharing, "interact");
645
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
646
+ return handlers.supervisor.add(params.spec);
647
+ },
648
+ /**
649
+ * Apply a spec declaratively (idempotent, like kubectl apply).
650
+ * Returns { action: 'created'|'updated'|'no-change', info: ProcessInfo }
651
+ */
652
+ processApply: async (params, context) => {
653
+ authorizeRequest(context, currentMetadata.sharing, "interact");
654
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
655
+ return handlers.supervisor.apply(params.spec);
656
+ },
657
+ /**
658
+ * Partially update a process spec and restart it.
659
+ * Returns updated ProcessInfo.
660
+ */
661
+ processUpdate: async (params, context) => {
662
+ authorizeRequest(context, currentMetadata.sharing, "interact");
663
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
664
+ return handlers.supervisor.update(params.idOrName, params.spec);
665
+ },
666
+ /** Get a single process by id or name. */
667
+ processGet: async (params, context) => {
668
+ authorizeRequest(context, currentMetadata.sharing, "view");
669
+ if (!handlers.supervisor) return void 0;
670
+ return handlers.supervisor.get(params.idOrName);
671
+ },
672
+ /** Start a stopped/failed process. */
673
+ processStart: async (params, context) => {
674
+ authorizeRequest(context, currentMetadata.sharing, "interact");
675
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
676
+ await handlers.supervisor.start(params.idOrName);
677
+ },
678
+ /** Stop a running process (keeps it in supervision). */
679
+ processStop: async (params, context) => {
680
+ authorizeRequest(context, currentMetadata.sharing, "interact");
681
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
682
+ await handlers.supervisor.stop(params.idOrName);
683
+ },
684
+ /** Restart a process. */
685
+ processRestart: async (params, context) => {
686
+ authorizeRequest(context, currentMetadata.sharing, "interact");
687
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
688
+ await handlers.supervisor.restart(params.idOrName);
689
+ },
690
+ /** Stop and permanently remove a process from supervision. */
691
+ processRemove: async (params, context) => {
692
+ authorizeRequest(context, currentMetadata.sharing, "admin");
693
+ if (!handlers.supervisor) throw new Error("Process supervisor not available");
694
+ await handlers.supervisor.remove(params.idOrName);
695
+ },
696
+ /** Get recent log lines for a process. */
697
+ processLogs: async (params, context) => {
698
+ authorizeRequest(context, currentMetadata.sharing, "view");
699
+ if (!handlers.supervisor) return [];
700
+ return handlers.supervisor.getLogs(params.idOrName, params.last ?? 50);
701
+ },
702
+ // ── Service group management (proxies to agent-sandbox API) ──────────
703
+ /** List all exposed service groups for this machine's namespace. */
704
+ serviceList: async (context) => {
705
+ authorizeRequest(context, currentMetadata.sharing, "view");
706
+ try {
707
+ const { listServiceGroups } = await import('./api-Cegey1dh.mjs');
708
+ return await listServiceGroups();
709
+ } catch (err) {
710
+ return [];
711
+ }
712
+ },
713
+ /** Get full details of a single service group (includes backends + health). */
714
+ serviceGet: async (params, context) => {
715
+ authorizeRequest(context, currentMetadata.sharing, "view");
716
+ const { getServiceGroup } = await import('./api-Cegey1dh.mjs');
717
+ return getServiceGroup(params.name);
718
+ },
719
+ /** Delete a service group. */
720
+ serviceDelete: async (params, context) => {
721
+ authorizeRequest(context, currentMetadata.sharing, "admin");
722
+ const { deleteServiceGroup } = await import('./api-Cegey1dh.mjs');
723
+ return deleteServiceGroup(params.name);
724
+ },
635
725
  // WISE voice — create ephemeral token for OpenAI Realtime API
636
726
  wiseCreateEphemeralToken: async (params, context) => {
637
727
  authorizeRequest(context, currentMetadata.sharing, "interact");
@@ -3694,6 +3784,442 @@ function sanitizeEnvForSharing(env) {
3694
3784
  return sanitized;
3695
3785
  }
3696
3786
 
3787
+ const DEFAULT_PROBE_INTERVAL_S = 10;
3788
+ const DEFAULT_PROBE_TIMEOUT_S = 5;
3789
+ const DEFAULT_PROBE_FAILURE_THRESHOLD = 3;
3790
+ const MAX_LOG_LINES = 300;
3791
+ class ProcessSupervisor {
3792
+ entries = /* @__PURE__ */ new Map();
3793
+ persistDir;
3794
+ constructor(persistDir) {
3795
+ this.persistDir = persistDir;
3796
+ }
3797
+ // ── Lifecycle ─────────────────────────────────────────────────────────────
3798
+ /** Must be called once after construction to load persisted specs. */
3799
+ async init() {
3800
+ await mkdir$1(this.persistDir, { recursive: true });
3801
+ await this.loadAll();
3802
+ }
3803
+ /** Stop all managed processes (called on daemon shutdown). */
3804
+ async stopAll() {
3805
+ const ids = Array.from(this.entries.keys());
3806
+ await Promise.all(ids.map((id) => this.stop(id).catch(() => {
3807
+ })));
3808
+ }
3809
+ // ── Public API ────────────────────────────────────────────────────────────
3810
+ /**
3811
+ * Add a new supervised process and start it immediately.
3812
+ * Throws if a process with the same name already exists.
3813
+ */
3814
+ async add(spec) {
3815
+ for (const entry2 of this.entries.values()) {
3816
+ if (entry2.spec.name === spec.name) {
3817
+ throw new Error(`Process '${spec.name}' already exists (id: ${entry2.spec.id}). Use restart or remove first.`);
3818
+ }
3819
+ }
3820
+ const fullSpec = {
3821
+ ...spec,
3822
+ id: randomUUID$1(),
3823
+ createdAt: Date.now()
3824
+ };
3825
+ const entry = this.makeEntry(fullSpec);
3826
+ this.entries.set(fullSpec.id, entry);
3827
+ await this.persistSpec(fullSpec);
3828
+ await this.startEntry(
3829
+ entry,
3830
+ false
3831
+ /* onRestore */
3832
+ );
3833
+ return this.toInfo(entry);
3834
+ }
3835
+ /** Start a stopped/failed process by id or name. */
3836
+ async start(idOrName) {
3837
+ const entry = this.require(idOrName);
3838
+ if (entry.child && !entry.stopping) throw new Error(`Process '${entry.spec.name}' is already running`);
3839
+ entry.stopping = false;
3840
+ await this.startEntry(entry, false);
3841
+ }
3842
+ /** Stop a running process. Does NOT remove it from supervision. */
3843
+ async stop(idOrName) {
3844
+ const entry = this.require(idOrName);
3845
+ entry.stopping = true;
3846
+ this.clearTimers(entry);
3847
+ if (entry.child) {
3848
+ await this.killChild(entry.child);
3849
+ entry.child = void 0;
3850
+ }
3851
+ entry.state.status = "stopped";
3852
+ entry.state.stoppedAt = Date.now();
3853
+ entry.state.pid = void 0;
3854
+ }
3855
+ /** Restart a process (stop if running, then start again). */
3856
+ async restart(idOrName) {
3857
+ const entry = this.require(idOrName);
3858
+ if (entry.child) {
3859
+ entry.stopping = true;
3860
+ this.clearTimers(entry);
3861
+ await this.killChild(entry.child);
3862
+ entry.child = void 0;
3863
+ }
3864
+ entry.stopping = false;
3865
+ entry.state.restartCount++;
3866
+ await this.startEntry(entry, false);
3867
+ }
3868
+ /** Stop the process and remove it from supervision (deletes persisted spec). */
3869
+ async remove(idOrName) {
3870
+ const entry = this.require(idOrName);
3871
+ const id = entry.spec.id;
3872
+ await this.stop(entry.spec.name);
3873
+ this.entries.delete(id);
3874
+ await this.deleteSpec(id);
3875
+ }
3876
+ /** List all supervised processes. */
3877
+ list() {
3878
+ return Array.from(this.entries.values()).map((e) => this.toInfo(e));
3879
+ }
3880
+ /** Get a single process by id or name. Returns undefined if not found. */
3881
+ get(idOrName) {
3882
+ const entry = this.findByIdOrName(idOrName);
3883
+ return entry ? this.toInfo(entry) : void 0;
3884
+ }
3885
+ /** Return the last N log lines for a process. */
3886
+ getLogs(idOrName, last = 50) {
3887
+ const entry = this.findByIdOrName(idOrName);
3888
+ if (!entry) return [];
3889
+ const buf = entry.logBuffer;
3890
+ return last <= 0 ? [...buf] : buf.slice(-last);
3891
+ }
3892
+ /**
3893
+ * Apply a spec declaratively (idempotent, like `kubectl apply`).
3894
+ *
3895
+ * - If no process with spec.name exists → create and start (action: 'created')
3896
+ * - If exists and spec is unchanged → no-op (action: 'no-change')
3897
+ * - If exists and spec changed → update config + restart (action: 'updated')
3898
+ */
3899
+ async apply(spec) {
3900
+ const existing = Array.from(this.entries.values()).find((e) => e.spec.name === spec.name);
3901
+ if (!existing) {
3902
+ const info = await this.add(spec);
3903
+ return { action: "created", info };
3904
+ }
3905
+ if (this.specsEqual(existing.spec, spec)) {
3906
+ return { action: "no-change", info: this.toInfo(existing) };
3907
+ }
3908
+ const updatedSpec = {
3909
+ ...spec,
3910
+ id: existing.spec.id,
3911
+ createdAt: existing.spec.createdAt
3912
+ // preserve original creation time
3913
+ };
3914
+ existing.spec = updatedSpec;
3915
+ await this.persistSpec(updatedSpec);
3916
+ existing.stopping = true;
3917
+ this.clearTimers(existing);
3918
+ if (existing.child) {
3919
+ await this.killChild(existing.child);
3920
+ existing.child = void 0;
3921
+ }
3922
+ existing.stopping = false;
3923
+ existing.state.status = "starting";
3924
+ this.spawnProcess(existing);
3925
+ return { action: "updated", info: this.toInfo(existing) };
3926
+ }
3927
+ /**
3928
+ * Update a running process's spec and restart it.
3929
+ * Merges the provided partial spec over the existing spec.
3930
+ */
3931
+ async update(idOrName, partialSpec) {
3932
+ const entry = this.require(idOrName);
3933
+ const updatedSpec = { ...entry.spec, ...partialSpec };
3934
+ entry.spec = updatedSpec;
3935
+ await this.persistSpec(updatedSpec);
3936
+ entry.stopping = true;
3937
+ this.clearTimers(entry);
3938
+ if (entry.child) {
3939
+ await this.killChild(entry.child);
3940
+ entry.child = void 0;
3941
+ }
3942
+ entry.stopping = false;
3943
+ entry.state.status = "starting";
3944
+ this.spawnProcess(entry);
3945
+ return this.toInfo(entry);
3946
+ }
3947
+ // ── Spec equality ─────────────────────────────────────────────────────────
3948
+ /** Compare two specs for equality (ignoring id, createdAt, and runtime state). */
3949
+ specsEqual(a, b) {
3950
+ const pick = (s) => ({
3951
+ name: s.name,
3952
+ command: s.command,
3953
+ args: JSON.stringify(s.args),
3954
+ workdir: s.workdir,
3955
+ env: JSON.stringify(s.env ?? {}),
3956
+ keepAlive: s.keepAlive,
3957
+ maxRestarts: s.maxRestarts,
3958
+ restartDelay: s.restartDelay,
3959
+ ttl: s.ttl,
3960
+ probe: JSON.stringify(s.probe ?? null),
3961
+ serviceGroup: s.serviceGroup,
3962
+ ports: JSON.stringify(s.ports ?? [])
3963
+ });
3964
+ return JSON.stringify(pick(a)) === JSON.stringify(pick(b));
3965
+ }
3966
+ // ── Persistence ───────────────────────────────────────────────────────────
3967
+ async loadAll() {
3968
+ let files;
3969
+ try {
3970
+ files = await readdir(this.persistDir);
3971
+ } catch {
3972
+ return;
3973
+ }
3974
+ let loaded = 0;
3975
+ for (const file of files) {
3976
+ if (!file.endsWith(".json")) continue;
3977
+ try {
3978
+ const raw = await readFile(path.join(this.persistDir, file), "utf-8");
3979
+ const spec = JSON.parse(raw);
3980
+ const entry = this.makeEntry(spec);
3981
+ this.entries.set(spec.id, entry);
3982
+ if (spec.keepAlive) {
3983
+ await this.startEntry(
3984
+ entry,
3985
+ true
3986
+ /* onRestore */
3987
+ );
3988
+ }
3989
+ loaded++;
3990
+ } catch (err) {
3991
+ console.error(`[SUPERVISOR] Failed to load process spec ${file}: ${err.message}`);
3992
+ }
3993
+ }
3994
+ if (loaded > 0) {
3995
+ console.log(`[SUPERVISOR] Restored ${loaded} supervised process(es)`);
3996
+ }
3997
+ }
3998
+ async persistSpec(spec) {
3999
+ const filePath = path.join(this.persistDir, `${spec.id}.json`);
4000
+ await writeFile(filePath, JSON.stringify(spec, null, 2), "utf-8");
4001
+ }
4002
+ async deleteSpec(id) {
4003
+ try {
4004
+ await unlink(path.join(this.persistDir, `${id}.json`));
4005
+ } catch {
4006
+ }
4007
+ }
4008
+ // ── Internal helpers ──────────────────────────────────────────────────────
4009
+ makeEntry(spec) {
4010
+ return {
4011
+ spec,
4012
+ state: {
4013
+ id: spec.id,
4014
+ status: "pending",
4015
+ restartCount: 0,
4016
+ consecutiveProbeFailures: 0
4017
+ },
4018
+ logBuffer: [],
4019
+ stopping: false
4020
+ };
4021
+ }
4022
+ require(idOrName) {
4023
+ const entry = this.findByIdOrName(idOrName);
4024
+ if (!entry) throw new Error(`Process '${idOrName}' not found`);
4025
+ return entry;
4026
+ }
4027
+ findByIdOrName(idOrName) {
4028
+ return this.entries.get(idOrName) ?? Array.from(this.entries.values()).find((e) => e.spec.name === idOrName);
4029
+ }
4030
+ toInfo(entry) {
4031
+ return { spec: entry.spec, state: { ...entry.state } };
4032
+ }
4033
+ // ── Process spawning ──────────────────────────────────────────────────────
4034
+ async startEntry(entry, onRestore) {
4035
+ const { spec } = entry;
4036
+ if (spec.ttl !== void 0 && onRestore) {
4037
+ const elapsedS = (Date.now() - spec.createdAt) / 1e3;
4038
+ if (elapsedS >= spec.ttl) {
4039
+ console.log(`[SUPERVISOR] Process '${spec.name}' TTL expired on restore, removing`);
4040
+ entry.state.status = "expired";
4041
+ this.entries.delete(spec.id);
4042
+ await this.deleteSpec(spec.id);
4043
+ return;
4044
+ }
4045
+ }
4046
+ entry.state.status = "starting";
4047
+ entry.state.pid = void 0;
4048
+ entry.state.startedAt = void 0;
4049
+ entry.state.consecutiveProbeFailures = 0;
4050
+ this.spawnProcess(entry);
4051
+ }
4052
+ spawnProcess(entry) {
4053
+ const { spec, state } = entry;
4054
+ try {
4055
+ const env = { ...process.env, ...spec.env ?? {} };
4056
+ const child = spawn$1(spec.command, spec.args, {
4057
+ cwd: spec.workdir,
4058
+ env,
4059
+ stdio: ["ignore", "pipe", "pipe"]
4060
+ });
4061
+ entry.child = child;
4062
+ state.status = "running";
4063
+ state.pid = child.pid;
4064
+ state.startedAt = Date.now();
4065
+ const appendLog = (chunk) => {
4066
+ for (const line of chunk.toString().split("\n")) {
4067
+ if (!line) continue;
4068
+ entry.logBuffer.push(line);
4069
+ if (entry.logBuffer.length > MAX_LOG_LINES) entry.logBuffer.shift();
4070
+ }
4071
+ };
4072
+ child.stdout?.on("data", appendLog);
4073
+ child.stderr?.on("data", appendLog);
4074
+ child.on("exit", (code, signal) => this.onProcessExit(entry, code, signal));
4075
+ if (spec.probe) this.setupProbe(entry);
4076
+ if (spec.ttl !== void 0) this.setupTTL(entry);
4077
+ console.log(`[SUPERVISOR] Started '${spec.name}' pid=${child.pid}`);
4078
+ } catch (err) {
4079
+ state.status = "failed";
4080
+ state.stoppedAt = Date.now();
4081
+ console.error(`[SUPERVISOR] Failed to spawn '${spec.name}': ${err.message}`);
4082
+ }
4083
+ }
4084
+ onProcessExit(entry, code, signal) {
4085
+ const { spec, state } = entry;
4086
+ entry.child = void 0;
4087
+ state.pid = void 0;
4088
+ state.stoppedAt = Date.now();
4089
+ this.clearTimers(entry);
4090
+ if (entry.stopping) {
4091
+ state.status = "stopped";
4092
+ console.log(`[SUPERVISOR] Process '${spec.name}' stopped (code=${code})`);
4093
+ return;
4094
+ }
4095
+ const crashed = code !== 0 || signal !== null;
4096
+ state.status = crashed ? "failed" : "stopped";
4097
+ console.log(`[SUPERVISOR] Process '${spec.name}' exited (code=${code}, signal=${signal})`);
4098
+ if (!spec.keepAlive) return;
4099
+ if (spec.maxRestarts > 0 && state.restartCount >= spec.maxRestarts) {
4100
+ console.warn(`[SUPERVISOR] Process '${spec.name}' reached max restarts (${spec.maxRestarts}), not restarting`);
4101
+ state.status = "failed";
4102
+ return;
4103
+ }
4104
+ const delayMs = spec.restartDelay * 1e3;
4105
+ console.log(`[SUPERVISOR] Scheduling restart of '${spec.name}' in ${delayMs}ms (restart #${state.restartCount + 1})`);
4106
+ entry.restartTimer = setTimeout(() => {
4107
+ if (entry.stopping) return;
4108
+ state.restartCount++;
4109
+ state.status = "starting";
4110
+ this.spawnProcess(entry);
4111
+ }, delayMs);
4112
+ }
4113
+ // ── Health probes ─────────────────────────────────────────────────────────
4114
+ setupProbe(entry) {
4115
+ const intervalMs = (entry.spec.probe.interval ?? DEFAULT_PROBE_INTERVAL_S) * 1e3;
4116
+ entry.probeTimer = setInterval(() => {
4117
+ this.runHealthCheck(entry).catch(() => {
4118
+ });
4119
+ }, intervalMs);
4120
+ }
4121
+ async runHealthCheck(entry) {
4122
+ if (!entry.child || entry.state.status !== "running") return;
4123
+ const probe = entry.spec.probe;
4124
+ const urlPath = probe.path ?? "/";
4125
+ const timeoutMs = (probe.timeout ?? DEFAULT_PROBE_TIMEOUT_S) * 1e3;
4126
+ const threshold = probe.failureThreshold ?? DEFAULT_PROBE_FAILURE_THRESHOLD;
4127
+ const url = `http://localhost:${probe.port}${urlPath}`;
4128
+ try {
4129
+ const controller = new AbortController();
4130
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
4131
+ let resp;
4132
+ try {
4133
+ resp = await fetch(url, { signal: controller.signal });
4134
+ } finally {
4135
+ clearTimeout(timer);
4136
+ }
4137
+ const ok = resp.ok;
4138
+ entry.state.lastProbe = { ok, timestamp: Date.now(), statusCode: resp.status };
4139
+ if (ok) {
4140
+ entry.state.consecutiveProbeFailures = 0;
4141
+ } else {
4142
+ entry.state.consecutiveProbeFailures++;
4143
+ console.warn(
4144
+ `[SUPERVISOR] Probe FAIL '${entry.spec.name}' HTTP ${resp.status} (${entry.state.consecutiveProbeFailures}/${threshold})`
4145
+ );
4146
+ if (entry.state.consecutiveProbeFailures >= threshold) {
4147
+ await this.triggerProbeRestart(entry);
4148
+ }
4149
+ }
4150
+ } catch (err) {
4151
+ entry.state.lastProbe = { ok: false, timestamp: Date.now(), error: err.message };
4152
+ entry.state.consecutiveProbeFailures++;
4153
+ console.warn(
4154
+ `[SUPERVISOR] Probe ERROR '${entry.spec.name}' ${err.message} (${entry.state.consecutiveProbeFailures}/${threshold})`
4155
+ );
4156
+ if (entry.state.consecutiveProbeFailures >= threshold) {
4157
+ await this.triggerProbeRestart(entry);
4158
+ }
4159
+ }
4160
+ }
4161
+ async triggerProbeRestart(entry) {
4162
+ console.warn(`[SUPERVISOR] Restarting '${entry.spec.name}' due to probe failures`);
4163
+ entry.state.consecutiveProbeFailures = 0;
4164
+ this.clearTimers(entry);
4165
+ try {
4166
+ await this.restart(entry.spec.id);
4167
+ } catch (err) {
4168
+ console.error(`[SUPERVISOR] Probe-triggered restart failed for '${entry.spec.name}': ${err.message}`);
4169
+ }
4170
+ }
4171
+ // ── TTL ───────────────────────────────────────────────────────────────────
4172
+ setupTTL(entry) {
4173
+ const elapsedS = (Date.now() - entry.spec.createdAt) / 1e3;
4174
+ const remainingS = entry.spec.ttl - elapsedS;
4175
+ if (remainingS <= 0) {
4176
+ this.expireProcess(entry);
4177
+ return;
4178
+ }
4179
+ console.log(`[SUPERVISOR] Process '${entry.spec.name}' TTL: expires in ${remainingS.toFixed(0)}s`);
4180
+ entry.ttlTimer = setTimeout(() => this.expireProcess(entry), remainingS * 1e3);
4181
+ }
4182
+ expireProcess(entry) {
4183
+ console.log(`[SUPERVISOR] Process '${entry.spec.name}' TTL expired`);
4184
+ entry.state.status = "expired";
4185
+ entry.stopping = true;
4186
+ const cleanup = async () => {
4187
+ if (entry.child) await this.killChild(entry.child);
4188
+ this.entries.delete(entry.spec.id);
4189
+ await this.deleteSpec(entry.spec.id).catch(() => {
4190
+ });
4191
+ };
4192
+ cleanup().catch((err) => console.error("[SUPERVISOR] TTL cleanup error:", err));
4193
+ }
4194
+ // ── Process kill helper ───────────────────────────────────────────────────
4195
+ killChild(child) {
4196
+ return new Promise((resolve) => {
4197
+ const done = () => resolve();
4198
+ child.once("exit", done);
4199
+ child.kill("SIGTERM");
4200
+ const forceKill = setTimeout(() => {
4201
+ child.kill("SIGKILL");
4202
+ }, 5e3);
4203
+ child.once("exit", () => clearTimeout(forceKill));
4204
+ });
4205
+ }
4206
+ // ── Timer cleanup ─────────────────────────────────────────────────────────
4207
+ clearTimers(entry) {
4208
+ if (entry.probeTimer) {
4209
+ clearInterval(entry.probeTimer);
4210
+ entry.probeTimer = void 0;
4211
+ }
4212
+ if (entry.ttlTimer) {
4213
+ clearTimeout(entry.ttlTimer);
4214
+ entry.ttlTimer = void 0;
4215
+ }
4216
+ if (entry.restartTimer) {
4217
+ clearTimeout(entry.restartTimer);
4218
+ entry.restartTimer = void 0;
4219
+ }
4220
+ }
4221
+ }
4222
+
3697
4223
  const __filename$1 = fileURLToPath(import.meta.url);
3698
4224
  const __dirname$1 = dirname(__filename$1);
3699
4225
  function loadEnvFile(path) {
@@ -4444,6 +4970,8 @@ async function startDaemon(options) {
4444
4970
  logger.log(` Workspace: ${hyphaWorkspace || "(default)"}`);
4445
4971
  logger.log(` Machine ID: ${machineId}`);
4446
4972
  let server = null;
4973
+ const supervisor = new ProcessSupervisor(join(SVAMP_HOME, "processes"));
4974
+ await supervisor.init();
4447
4975
  try {
4448
4976
  logger.log("Connecting to Hypha server...");
4449
4977
  server = await connectToHypha({
@@ -4461,6 +4989,7 @@ async function startDaemon(options) {
4461
4989
  if (consecutiveHeartbeatFailures > 0) {
4462
4990
  logger.log(`Hypha reconnection successful \u2014 services re-registered (resetting ${consecutiveHeartbeatFailures} failures)`);
4463
4991
  consecutiveHeartbeatFailures = 0;
4992
+ lastReconnectAt = Date.now();
4464
4993
  }
4465
4994
  });
4466
4995
  const pidToTrackedSession = /* @__PURE__ */ new Map();
@@ -6025,7 +6554,8 @@ The automated loop has finished. Review the progress above and let me know if yo
6025
6554
  stopSession,
6026
6555
  restartSession,
6027
6556
  requestShutdown: () => requestShutdown("hypha-app"),
6028
- getTrackedSessions: getCurrentChildren
6557
+ getTrackedSessions: getCurrentChildren,
6558
+ supervisor
6029
6559
  }
6030
6560
  );
6031
6561
  logger.log(`Machine service registered: svamp-machine-${machineId}`);
@@ -6206,7 +6736,9 @@ The automated loop has finished. Review the progress above and let me know if yo
6206
6736
  const HEARTBEAT_INTERVAL_MS = 1e4;
6207
6737
  const PING_TIMEOUT_MS = 5e3;
6208
6738
  const MAX_FAILURES = 60;
6739
+ const POST_RECONNECT_GRACE_MS = 2e4;
6209
6740
  let heartbeatRunning = false;
6741
+ let lastReconnectAt = 0;
6210
6742
  const heartbeatInterval = setInterval(async () => {
6211
6743
  if (heartbeatRunning) return;
6212
6744
  heartbeatRunning = true;
@@ -6222,20 +6754,14 @@ The automated loop has finished. Review the progress above and let me know if yo
6222
6754
  try {
6223
6755
  const installedVersion = readPackageVersion();
6224
6756
  if (installedVersion !== "unknown" && installedVersion !== DAEMON_VERSION) {
6225
- logger.log(`svamp-cli version changed on disk: ${DAEMON_VERSION} \u2192 ${installedVersion}. Self-restarting...`);
6226
6757
  const supervised2 = process.env.SVAMP_SUPERVISED === "1";
6227
- if (!supervised2) {
6228
- cleanupDaemonStateFile();
6229
- const { spawn: spawnSelf } = await import('child_process');
6230
- spawnSelf(process.argv[0], process.argv.slice(1), {
6231
- detached: true,
6232
- stdio: "ignore",
6233
- env: process.env
6234
- }).unref();
6235
- await new Promise((r) => setTimeout(r, 500));
6758
+ if (supervised2) {
6759
+ logger.log(`svamp-cli version changed on disk: ${DAEMON_VERSION} \u2192 ${installedVersion}. Exiting for launchd restart...`);
6760
+ requestShutdown("version-update", `Updated ${DAEMON_VERSION} \u2192 ${installedVersion}`);
6761
+ return;
6762
+ } else {
6763
+ logger.log(`svamp-cli version changed on disk: ${DAEMON_VERSION} \u2192 ${installedVersion}. Run 'svamp daemon stop && svamp daemon start' to apply the update.`);
6236
6764
  }
6237
- requestShutdown("version-update", `Updated ${DAEMON_VERSION} \u2192 ${installedVersion}`);
6238
- return;
6239
6765
  }
6240
6766
  } catch {
6241
6767
  }
@@ -6252,36 +6778,65 @@ The automated loop has finished. Review the progress above and let me know if yo
6252
6778
  }
6253
6779
  }
6254
6780
  }
6255
- try {
6256
- await Promise.race([
6257
- server.echo("ping"),
6258
- new Promise((_, reject) => setTimeout(() => reject(new Error("Ping timed out")), PING_TIMEOUT_MS))
6259
- ]);
6260
- if (consecutiveHeartbeatFailures > 0) {
6261
- logger.log(`Heartbeat recovered after ${consecutiveHeartbeatFailures} failures`);
6262
- consecutiveHeartbeatFailures = 0;
6263
- }
6264
- } catch (err) {
6265
- consecutiveHeartbeatFailures++;
6266
- if (consecutiveHeartbeatFailures === 1) {
6267
- logger.log(`Ping failed: ${err.message}`);
6268
- } else if (consecutiveHeartbeatFailures % 6 === 0) {
6269
- logger.log(`Connection down for ${consecutiveHeartbeatFailures * HEARTBEAT_INTERVAL_MS / 1e3}s (${consecutiveHeartbeatFailures}/${MAX_FAILURES})`);
6270
- }
6271
- if (consecutiveHeartbeatFailures === 1 || consecutiveHeartbeatFailures % 3 === 0) {
6272
- const conn = server.rpc?._connection;
6273
- const ws = conn?._websocket;
6274
- if (ws?.readyState === 1) {
6275
- logger.log("Force-closing stale WebSocket to trigger reconnection");
6276
- try {
6277
- ws.close(4e3, "Stale connection");
6278
- } catch {
6781
+ const inGrace = lastReconnectAt > 0 && Date.now() - lastReconnectAt < POST_RECONNECT_GRACE_MS;
6782
+ if (!inGrace) {
6783
+ try {
6784
+ const pingStart = Date.now();
6785
+ await new Promise((resolve2, reject) => {
6786
+ const conn = server.rpc?._connection;
6787
+ const ws = conn?._websocket;
6788
+ if (!ws || ws.readyState !== 1) {
6789
+ reject(new Error("WebSocket not open"));
6790
+ return;
6279
6791
  }
6792
+ const timer = setTimeout(() => {
6793
+ ws.removeEventListener("message", onMsg);
6794
+ reject(new Error("Ping timed out"));
6795
+ }, PING_TIMEOUT_MS);
6796
+ const onMsg = (event) => {
6797
+ try {
6798
+ const d = typeof event.data === "string" ? JSON.parse(event.data) : null;
6799
+ if (d?.type === "pong") {
6800
+ clearTimeout(timer);
6801
+ ws.removeEventListener("message", onMsg);
6802
+ resolve2();
6803
+ }
6804
+ } catch {
6805
+ }
6806
+ };
6807
+ ws.addEventListener("message", onMsg);
6808
+ ws.send(JSON.stringify({ type: "ping" }));
6809
+ });
6810
+ const pingMs = Date.now() - pingStart;
6811
+ if (pingMs > 1e3) {
6812
+ logger.log(`Slow ping: ${pingMs}ms`);
6813
+ }
6814
+ if (consecutiveHeartbeatFailures > 0) {
6815
+ logger.log(`Heartbeat recovered after ${consecutiveHeartbeatFailures} failures`);
6816
+ consecutiveHeartbeatFailures = 0;
6817
+ }
6818
+ } catch (err) {
6819
+ consecutiveHeartbeatFailures++;
6820
+ if (consecutiveHeartbeatFailures === 1) {
6821
+ logger.log(`Ping failed: ${err.message}`);
6822
+ } else if (consecutiveHeartbeatFailures % 6 === 0) {
6823
+ logger.log(`Connection down for ${consecutiveHeartbeatFailures * HEARTBEAT_INTERVAL_MS / 1e3}s (${consecutiveHeartbeatFailures}/${MAX_FAILURES})`);
6824
+ }
6825
+ if (consecutiveHeartbeatFailures === 1 || consecutiveHeartbeatFailures % 3 === 0) {
6826
+ const conn = server.rpc?._connection;
6827
+ const ws = conn?._websocket;
6828
+ if (ws?.readyState === 1) {
6829
+ logger.log("Force-closing stale WebSocket to trigger reconnection");
6830
+ try {
6831
+ ws.close(4e3, "Stale connection");
6832
+ } catch {
6833
+ }
6834
+ }
6835
+ }
6836
+ if (consecutiveHeartbeatFailures >= MAX_FAILURES) {
6837
+ logger.log(`Heartbeat failed ${MAX_FAILURES} times. Shutting down.`);
6838
+ requestShutdown("heartbeat-timeout", err.message);
6280
6839
  }
6281
- }
6282
- if (consecutiveHeartbeatFailures >= MAX_FAILURES) {
6283
- logger.log(`Heartbeat failed ${MAX_FAILURES} times. Shutting down.`);
6284
- requestShutdown("heartbeat-timeout", err.message);
6285
6840
  }
6286
6841
  }
6287
6842
  } finally {
@@ -6353,6 +6908,8 @@ The automated loop has finished. Review the progress above and let me know if yo
6353
6908
  await debugService.disconnect();
6354
6909
  } catch {
6355
6910
  }
6911
+ await supervisor.stopAll().catch(() => {
6912
+ });
6356
6913
  artifactSync.destroy();
6357
6914
  try {
6358
6915
  await server.disconnect();