Package not found. Please check the package name and try again.

@openparachute/hub 0.6.3-rc.3 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ /**
2
+ * Detect + disable STALE per-module autostart units during the
3
+ * detached→supervised cutover + teardown (hub#522, design
4
+ * `parachute.computer/design/2026-06-01-hub-as-supervisor-unification.md` §7.2).
5
+ *
6
+ * THE BUG (validated hands-on on friends.parachute.computer): after a box
7
+ * migrates to the supervised model, a leftover STANDALONE per-module autostart
8
+ * unit from the pre-supervisor era — a systemd user unit `parachute-vault.service`
9
+ * with `Restart=always`, or a launchd `computer.parachute.vault` LaunchAgent with
10
+ * `KeepAlive` — keeps RESPAWNING an unsupervised vault that binds port 1940. The
11
+ * supervised hub's own vault child then can't bind → EADDRINUSE crash-loop →
12
+ * `crashed`, giving up. Killing the squatting PROCESS is whack-a-mole: the unit's
13
+ * KeepAlive / Restart=always resurrects it within seconds, serving OLD code.
14
+ *
15
+ * THE FIX (the load-bearing half of #522): the cutover must DISABLE THE UNIT, not
16
+ * just kill the process. Disabling deregisters the keep-alive intent so the
17
+ * module stays down and the supervised hub owns the port. The complementary half
18
+ * — the supervisor reclaiming its own port on EADDRINUSE at every start — is a
19
+ * separate follow-on; THIS module is the unit-disable that stops the respawn at
20
+ * the source.
21
+ *
22
+ * SCOPE + OWNERSHIP SAFETY (the hard constraint): we ONLY ever disable a unit
23
+ * whose name EXACTLY matches `parachute-<short>.service` (systemd) or
24
+ * `computer.parachute.<short>` (launchd) for a KNOWN module short
25
+ * (`knownServices()` — vault / scribe / runner / surface / notes / channel). We
26
+ * NEVER disable an arbitrary or unrecognized unit — an unknown unit is invisible
27
+ * to this sweep by construction (we look up exact names, never enumerate-and-
28
+ * match-loosely). On top of that we EXPLICITLY exclude the units the supervised
29
+ * model legitimately owns:
30
+ * - the hub unit (`computer.parachute.hub` / `parachute-hub.service`), and
31
+ * - the cloudflared connector (`computer.parachute.cloudflared.*` /
32
+ * `parachute-cloudflared-*`, owned by `expose off --cloudflare`).
33
+ * The skip-list reuses the canonical name constants (HUB_* + the cloudflared
34
+ * prefixes) so it can't drift.
35
+ *
36
+ * BEHAVIOR per platform (reuses the `ManagedUnitDeps` seam — `which` / `run`):
37
+ * - systemd (Linux): for each known short, query the USER unit
38
+ * `systemctl --user is-enabled parachute-<short>.service`. If it reads
39
+ * enabled (`enabled` / `enabled-runtime` / `static` / `alias`/`indirect`-ish)
40
+ * → `systemctl --user disable --now parachute-<short>.service`. A SYSTEM-level
41
+ * unit of the same name (detected via `systemctl is-enabled` without --user)
42
+ * is NOT touched (migrate has no sudo) — we WARN with the exact manual
43
+ * `sudo systemctl disable --now …` command instead.
44
+ * - launchd (Mac): for each known short, `launchctl print
45
+ * gui/<uid>/computer.parachute.<short>`; if the label is loaded → `launchctl
46
+ * bootout gui/<uid>/computer.parachute.<short>`.
47
+ *
48
+ * IDEMPOTENT: a unit that's already disabled / not-enabled / absent is a clean
49
+ * no-op (we never report disabling it). NON-FATAL: a disable that fails (perms,
50
+ * launchctl quirk) WARNS + continues — it never aborts the cutover. EVERYTHING
51
+ * behind the injectable `ManagedUnitDeps` seam so tests never touch real
52
+ * systemctl/launchctl.
53
+ */
54
+
55
+ import {
56
+ CLOUDFLARED_LAUNCHD_LABEL_PREFIX,
57
+ CLOUDFLARED_SYSTEMD_UNIT_PREFIX,
58
+ } from "./cloudflare/connector-service.ts";
59
+ import {
60
+ HUB_LAUNCHD_LABEL,
61
+ HUB_SYSTEMD_UNIT_NAME,
62
+ type ManagedUnitDeps,
63
+ defaultManagedUnitDeps,
64
+ } from "./managed-unit.ts";
65
+ import { knownServices } from "./service-spec.ts";
66
+
67
+ /** systemd unit name for a module short, e.g. `vault` → `parachute-vault.service`. */
68
+ export function moduleSystemdUnitName(short: string): string {
69
+ return `parachute-${short}.service`;
70
+ }
71
+
72
+ /** launchd label for a module short, e.g. `vault` → `computer.parachute.vault`. */
73
+ export function moduleLaunchdLabel(short: string): string {
74
+ return `computer.parachute.${short}`;
75
+ }
76
+
77
+ /**
78
+ * Is this systemd unit name one the supervised model legitimately owns (and the
79
+ * sweep must therefore NEVER disable)? The hub unit + any cloudflared connector
80
+ * unit. Reuses the canonical name constants so the skip can't drift.
81
+ */
82
+ function isProtectedSystemdUnit(unitName: string): boolean {
83
+ return unitName === HUB_SYSTEMD_UNIT_NAME || unitName.startsWith(CLOUDFLARED_SYSTEMD_UNIT_PREFIX);
84
+ }
85
+
86
+ /**
87
+ * Is this launchd label one the supervised model legitimately owns? The hub
88
+ * label + any cloudflared connector label (`computer.parachute.cloudflared.*`).
89
+ */
90
+ function isProtectedLaunchdLabel(label: string): boolean {
91
+ return (
92
+ label === HUB_LAUNCHD_LABEL ||
93
+ label === CLOUDFLARED_LAUNCHD_LABEL_PREFIX ||
94
+ label.startsWith(`${CLOUDFLARED_LAUNCHD_LABEL_PREFIX}.`)
95
+ );
96
+ }
97
+
98
+ /**
99
+ * The module shorts whose stale standalone autostart units the sweep targets.
100
+ * Derived from `knownServices()` (the canonical FIRST_PARTY_FALLBACKS +
101
+ * KNOWN_MODULES list — vault / scribe / runner / surface / notes / channel), so
102
+ * a future module is covered automatically. `hub` is deliberately NOT in that
103
+ * list — the hub unit is the supervised model itself; we never disable it. As a
104
+ * defensive double-check we also drop any short whose derived unit name lands in
105
+ * the protected skip-list (so the sweep can never disable the hub / cloudflared
106
+ * even if a future short collided).
107
+ */
108
+ export function targetModuleShorts(): string[] {
109
+ return knownServices().filter(
110
+ (short) =>
111
+ !isProtectedSystemdUnit(moduleSystemdUnitName(short)) &&
112
+ !isProtectedLaunchdLabel(moduleLaunchdLabel(short)),
113
+ );
114
+ }
115
+
116
+ /**
117
+ * systemd `is-enabled` tokens that mean "this unit will autostart" — i.e. the
118
+ * stale-unit problem we're disabling. `disabled` / `masked` / `not-found` (and a
119
+ * nonzero exit with empty stdout) mean it won't, so they're a no-op.
120
+ *
121
+ * `static` and `indirect` units have no [Install] section / are pulled in by
122
+ * another unit; a standalone leftover `parachute-vault.service` written by the
123
+ * old per-module autostall path always carried `[Install] WantedBy=…` so reads
124
+ * `enabled` — but we treat `static`/`indirect` as "present + active intent" too
125
+ * so an oddly-written leftover still gets cleaned. `linked`/`generated` likewise.
126
+ */
127
+ const SYSTEMD_ENABLED_TOKENS = new Set([
128
+ "enabled",
129
+ "enabled-runtime",
130
+ "static",
131
+ "indirect",
132
+ "linked",
133
+ "linked-runtime",
134
+ "generated",
135
+ "alias",
136
+ ]);
137
+
138
+ /** Outcome of one unit's detect-and-disable attempt. */
139
+ export interface StaleUnitAction {
140
+ /** The module short the unit belongs to. */
141
+ short: string;
142
+ /** "launchd" | "systemd-user" | "systemd-system". */
143
+ kind: "launchd" | "systemd-user" | "systemd-system";
144
+ /** The unit/label name acted on. */
145
+ unit: string;
146
+ /**
147
+ * "disabled" → we disabled it (report it; the operator sees what changed).
148
+ * "warn-system" → a system-level systemd unit we can't disable without sudo;
149
+ * we warn with the manual command. Non-fatal.
150
+ * "failed" → the disable command failed (perms/quirk); we warn + continue.
151
+ */
152
+ result: "disabled" | "warn-system" | "failed";
153
+ /** The exact line(s) the caller should surface (report / warning). */
154
+ messages: string[];
155
+ }
156
+
157
+ export interface DisableStaleModuleUnitsOpts {
158
+ /** Injectable platform deps (defaults to production). */
159
+ deps?: ManagedUnitDeps;
160
+ /** Sink for human-readable report / warning lines. */
161
+ log?: (line: string) => void;
162
+ }
163
+
164
+ export interface DisableStaleModuleUnitsResult {
165
+ /** Every unit we acted on (disabled / warned / failed). Empty = clean no-op. */
166
+ actions: StaleUnitAction[];
167
+ }
168
+
169
+ /**
170
+ * Detect + disable any STALE per-module autostart unit on this platform (#522).
171
+ * Idempotent + non-fatal: already-disabled/absent units are silent no-ops, and a
172
+ * failed disable warns + continues. Returns the list of actions taken; the caller
173
+ * surfaces the messages (the cutover threads them through its own `log`).
174
+ *
175
+ * Dispatch mirrors `managed-unit.ts`: darwin → launchctl, linux → systemctl.
176
+ * Other platforms (no per-module unit possible) → empty no-op.
177
+ */
178
+ export function disableStaleModuleUnits(
179
+ opts: DisableStaleModuleUnitsOpts = {},
180
+ ): DisableStaleModuleUnitsResult {
181
+ const deps = opts.deps ?? defaultManagedUnitDeps;
182
+ const log = opts.log ?? (() => {});
183
+ const actions: StaleUnitAction[] = [];
184
+
185
+ const record = (action: StaleUnitAction): void => {
186
+ actions.push(action);
187
+ for (const m of action.messages) log(m);
188
+ };
189
+
190
+ if (deps.platform === "darwin") {
191
+ if (deps.which("launchctl") === null) return { actions };
192
+ const uid = deps.getuid() ?? 0;
193
+ for (const short of targetModuleShorts()) {
194
+ const label = moduleLaunchdLabel(short);
195
+ // Belt-and-suspenders: never touch a protected (hub / cloudflared) label.
196
+ if (isProtectedLaunchdLabel(label)) continue;
197
+ const action = disableStaleLaunchdUnit(short, label, uid, deps);
198
+ if (action) record(action);
199
+ }
200
+ return { actions };
201
+ }
202
+
203
+ if (deps.platform === "linux") {
204
+ if (deps.which("systemctl") === null) return { actions };
205
+ for (const short of targetModuleShorts()) {
206
+ const unit = moduleSystemdUnitName(short);
207
+ if (isProtectedSystemdUnit(unit)) continue;
208
+ const action = disableStaleSystemdUnit(short, unit, deps);
209
+ if (action) record(action);
210
+ }
211
+ return { actions };
212
+ }
213
+
214
+ // No per-platform manager (container / init-less / Windows) → nothing to do.
215
+ return { actions };
216
+ }
217
+
218
+ /**
219
+ * launchd arm: probe `launchctl print gui/<uid>/<label>`. The label is LOADED
220
+ * (a stale KeepAlive LaunchAgent) when the print succeeds with non-empty output;
221
+ * we then `launchctl bootout` it (unload + stop → KeepAlive can't resurrect it).
222
+ * An unloaded/absent label prints empty/nonzero → clean no-op (returns undefined).
223
+ */
224
+ function disableStaleLaunchdUnit(
225
+ short: string,
226
+ label: string,
227
+ uid: number,
228
+ deps: ManagedUnitDeps,
229
+ ): StaleUnitAction | undefined {
230
+ let printed: { code: number; stdout: string; stderr: string };
231
+ try {
232
+ printed = deps.run(["launchctl", "print", `gui/${uid}/${label}`]);
233
+ } catch {
234
+ // launchctl threw (ENOENT between which() and run, or a quirk) — non-fatal.
235
+ return undefined;
236
+ }
237
+ // Not loaded → nothing to disable. `launchctl print` is nonzero + empty when
238
+ // the label isn't bootstrapped.
239
+ if (printed.stdout.trim().length === 0) return undefined;
240
+
241
+ let booted: { code: number; stdout: string; stderr: string };
242
+ try {
243
+ booted = deps.run(["launchctl", "bootout", `gui/${uid}/${label}`]);
244
+ } catch (err) {
245
+ return {
246
+ short,
247
+ kind: "launchd",
248
+ unit: label,
249
+ result: "failed",
250
+ messages: [
251
+ ` ⚠ Could not disable the stale LaunchAgent ${label} (${err instanceof Error ? err.message : String(err)}).`,
252
+ ` Run it yourself: launchctl bootout gui/${uid}/${label}`,
253
+ ],
254
+ };
255
+ }
256
+ if (booted.code !== 0) {
257
+ const detail = booted.stderr.trim() || booted.stdout.trim() || "unknown error";
258
+ return {
259
+ short,
260
+ kind: "launchd",
261
+ unit: label,
262
+ result: "failed",
263
+ messages: [
264
+ ` ⚠ Could not disable the stale LaunchAgent ${label} (${detail}).`,
265
+ ` Run it yourself: launchctl bootout gui/${uid}/${label}`,
266
+ ],
267
+ };
268
+ }
269
+ return {
270
+ short,
271
+ kind: "launchd",
272
+ unit: label,
273
+ result: "disabled",
274
+ messages: [
275
+ ` ✓ Disabled stale ${label} (it was fighting the supervised hub for ${short}'s port).`,
276
+ ],
277
+ };
278
+ }
279
+
280
+ /**
281
+ * systemd arm: a stale standalone module unit can live at USER scope (the common
282
+ * pre-supervisor leftover, no sudo to write) or SYSTEM scope (rarer). We probe
283
+ * both:
284
+ * - USER (`systemctl --user is-enabled <unit>`): if enabled → `--user disable
285
+ * --now`. This is the path migrate can actually fix.
286
+ * - SYSTEM (`systemctl is-enabled <unit>`): if enabled but USER wasn't → migrate
287
+ * has no sudo, so WARN with the exact `sudo systemctl disable --now …` command
288
+ * (never attempt sudo).
289
+ * An absent/disabled unit at both scopes → clean no-op (returns undefined).
290
+ */
291
+ function disableStaleSystemdUnit(
292
+ short: string,
293
+ unit: string,
294
+ deps: ManagedUnitDeps,
295
+ ): StaleUnitAction | undefined {
296
+ // --- USER scope first (what migrate can actually disable). ---
297
+ if (systemdUnitEnabled(unit, ["--user"], deps)) {
298
+ let res: { code: number; stdout: string; stderr: string };
299
+ try {
300
+ res = deps.run(["systemctl", "--user", "disable", "--now", unit]);
301
+ } catch (err) {
302
+ return {
303
+ short,
304
+ kind: "systemd-user",
305
+ unit,
306
+ result: "failed",
307
+ messages: [
308
+ ` ⚠ Could not disable the stale user unit ${unit} (${err instanceof Error ? err.message : String(err)}).`,
309
+ ` Run it yourself: systemctl --user disable --now ${unit}`,
310
+ ],
311
+ };
312
+ }
313
+ if (res.code !== 0) {
314
+ const detail = res.stderr.trim() || res.stdout.trim() || "unknown error";
315
+ return {
316
+ short,
317
+ kind: "systemd-user",
318
+ unit,
319
+ result: "failed",
320
+ messages: [
321
+ ` ⚠ Could not disable the stale user unit ${unit} (${detail}).`,
322
+ ` Run it yourself: systemctl --user disable --now ${unit}`,
323
+ ],
324
+ };
325
+ }
326
+ return {
327
+ short,
328
+ kind: "systemd-user",
329
+ unit,
330
+ result: "disabled",
331
+ messages: [
332
+ ` ✓ Disabled stale ${unit} (it was fighting the supervised hub for ${short}'s port).`,
333
+ ],
334
+ };
335
+ }
336
+
337
+ // --- SYSTEM scope: detect-only + warn (no sudo in migrate). ---
338
+ if (systemdUnitEnabled(unit, [], deps)) {
339
+ return {
340
+ short,
341
+ kind: "systemd-system",
342
+ unit,
343
+ result: "warn-system",
344
+ messages: [
345
+ ` ⚠ A SYSTEM-level ${unit} is enabled and may fight the supervised hub for ${short}'s port.`,
346
+ " Migrate can't disable a system unit (it needs root). Disable it yourself:",
347
+ ` sudo systemctl disable --now ${unit}`,
348
+ ],
349
+ };
350
+ }
351
+
352
+ return undefined;
353
+ }
354
+
355
+ /**
356
+ * `systemctl [--user] is-enabled <unit>` → true iff the printed token means the
357
+ * unit will autostart (see `SYSTEMD_ENABLED_TOKENS`). `is-enabled` exits nonzero
358
+ * for non-enabled states and prints the token to stdout regardless of exit, so
359
+ * we classify from the stdout token. A throw (ENOENT/quirk) → treated as
360
+ * not-enabled (non-fatal; the sweep continues).
361
+ */
362
+ function systemdUnitEnabled(unit: string, scope: string[], deps: ManagedUnitDeps): boolean {
363
+ let res: { code: number; stdout: string; stderr: string };
364
+ try {
365
+ res = deps.run(["systemctl", ...scope, "is-enabled", unit]);
366
+ } catch {
367
+ return false;
368
+ }
369
+ const token = res.stdout.trim() || res.stderr.trim();
370
+ if (token.length === 0) return false;
371
+ // `is-enabled` can print the token then a hint on a second line; read line 1.
372
+ const first = token.split("\n")[0]?.trim() ?? "";
373
+ return SYSTEMD_ENABLED_TOKENS.has(first);
374
+ }