@venturewild/workspace 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/src/daemon-bin.mjs +22 -0
- package/server/src/doctor.mjs +57 -2
- package/server/src/google-vouch.mjs +87 -0
- package/server/src/index.mjs +81 -5
- package/server/src/service.mjs +97 -1
- package/server/src/supervisor.mjs +139 -2
- package/web/dist/assets/{index-BPjgICaB.js → index-DatlFPkm.js} +31 -31
- package/web/dist/index.html +1 -1
package/package.json
CHANGED
|
@@ -86,3 +86,25 @@ export function resolveDaemonVersion({ env = process.env, requireResolve } = {})
|
|
|
86
86
|
return null;
|
|
87
87
|
}
|
|
88
88
|
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* The daemon version the INSTALLED meta package PINS for this platform — read
|
|
92
|
+
* from the meta `package.json`'s optionalDependencies. This is the version
|
|
93
|
+
* `npm i -g @venturewild/workspace@<v>` is SUPPOSED to have pulled onto disk.
|
|
94
|
+
* Comparing it to resolveDaemonVersion() catches the go-live failure where the
|
|
95
|
+
* meta package updated but its daemon optionalDependency on disk lagged behind
|
|
96
|
+
* (the tangled Windows dev box stuck on the @0.2.0-era daemon). Returns the
|
|
97
|
+
* pinned version string, or null when it can't be read. Self-contained: reads the
|
|
98
|
+
* meta package.json that ships two dirs up from this file.
|
|
99
|
+
*/
|
|
100
|
+
export function expectedDaemonVersion({ metaPkgPath } = {}) {
|
|
101
|
+
const tag = platformTag();
|
|
102
|
+
try {
|
|
103
|
+
const pkg = metaPkgPath || path.resolve(__dirname, '..', '..', 'package.json');
|
|
104
|
+
const parsed = JSON.parse(readFileSync(pkg, 'utf8'));
|
|
105
|
+
const v = parsed?.optionalDependencies?.[`@venturewild/workspace-daemon-${tag}`];
|
|
106
|
+
return typeof v === 'string' ? v.replace(/^[~^]/, '') : null;
|
|
107
|
+
} catch {
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
}
|
package/server/src/doctor.mjs
CHANGED
|
@@ -17,12 +17,25 @@ import path from 'node:path';
|
|
|
17
17
|
import { buildConfig, APP_VERSION } from './config.mjs';
|
|
18
18
|
import { detectAgents, pickDefaultAgent } from './agent.mjs';
|
|
19
19
|
import { probeAgentReadiness } from './agent-readiness.mjs';
|
|
20
|
-
import { resolveDaemonBinary } from './daemon-bin.mjs';
|
|
20
|
+
import { resolveDaemonBinary, resolveDaemonVersion, expectedDaemonVersion } from './daemon-bin.mjs';
|
|
21
21
|
import { checkPort } from './preview.mjs';
|
|
22
22
|
import { loadAccount } from './account.mjs';
|
|
23
23
|
import { serviceStatus } from './service.mjs';
|
|
24
24
|
import { probeHealth, probeHealthVersion } from './supervisor.mjs';
|
|
25
|
-
import { listLogs, diagnosticsDir } from './logpaths.mjs';
|
|
25
|
+
import { listLogs, diagnosticsDir, globalDir } from './logpaths.mjs';
|
|
26
|
+
|
|
27
|
+
// The daemon version the currently-RUNNING daemon was spawned under — the marker
|
|
28
|
+
// the supervisor writes to ~/.wild-workspace/daemon-runtime.json (the daemon's
|
|
29
|
+
// own /health reports no version). null when unread (never started / no marker).
|
|
30
|
+
function readRunningDaemonVersion(env = process.env) {
|
|
31
|
+
try {
|
|
32
|
+
const file = path.join(globalDir(env), 'daemon-runtime.json');
|
|
33
|
+
const v = JSON.parse(fs.readFileSync(file, 'utf8'))?.daemonVersion;
|
|
34
|
+
return typeof v === 'string' ? v : null;
|
|
35
|
+
} catch {
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
26
39
|
|
|
27
40
|
const STATUS_ICON = { ok: '✅', warn: '⚠️', fail: '❌', info: 'ℹ️' };
|
|
28
41
|
|
|
@@ -93,6 +106,9 @@ export async function runDoctor(opts = {}, deps = {}) {
|
|
|
93
106
|
listLogs: deps.listLogs || listLogs,
|
|
94
107
|
fetchImpl: deps.fetchImpl || ((...a) => globalThis.fetch(...a)),
|
|
95
108
|
probeRunningVersion: deps.probeRunningVersion || probeHealthVersion,
|
|
109
|
+
daemonInstalledVersion: deps.daemonInstalledVersion || (() => resolveDaemonVersion({ env })),
|
|
110
|
+
daemonExpectedVersion: deps.daemonExpectedVersion || (() => expectedDaemonVersion()),
|
|
111
|
+
daemonRunningVersion: deps.daemonRunningVersion || (() => readRunningDaemonVersion(env)),
|
|
96
112
|
};
|
|
97
113
|
const checks = [];
|
|
98
114
|
const add = (c) => checks.push(c);
|
|
@@ -169,6 +185,45 @@ export async function runDoctor(opts = {}, deps = {}) {
|
|
|
169
185
|
return { status: 'ok', detail: `${r.path} (${r.source})`, hint: null };
|
|
170
186
|
});
|
|
171
187
|
|
|
188
|
+
// 4b. Daemon version drift (the go-live stale-process finding, Part 8). Three
|
|
189
|
+
// versions should agree: what the meta package PINS (expected), what's actually
|
|
190
|
+
// on disk (installed subpackage), and what the live daemon was spawned under
|
|
191
|
+
// (running marker). A mismatch is the exact "support channel silently 504s after
|
|
192
|
+
// an update" chain — the meta package updated but the daemon binary on disk
|
|
193
|
+
// lagged, or the daemon kept running old code. Surfaced so the fix (reinstall /
|
|
194
|
+
// restart) is obvious instead of invisible.
|
|
195
|
+
await guarded('daemonVersion', 'Sync daemon version', async () => {
|
|
196
|
+
const expected = d.daemonExpectedVersion();
|
|
197
|
+
const installed = d.daemonInstalledVersion();
|
|
198
|
+
const running = d.daemonRunningVersion();
|
|
199
|
+
const bits = [`pinned=${expected || '?'}`, `installed=${installed || 'PATH/vendor'}`, `running=${running || 'not started'}`];
|
|
200
|
+
const detail = bits.join(' ');
|
|
201
|
+
// Meta pins a version but the on-disk daemon subpackage is a DIFFERENT one →
|
|
202
|
+
// `npm i -g` didn't refresh the optionalDependency (the Windows dev box lag).
|
|
203
|
+
if (expected && installed && expected !== installed) {
|
|
204
|
+
return {
|
|
205
|
+
status: 'warn',
|
|
206
|
+
detail,
|
|
207
|
+
hint: `The daemon on disk (${installed}) does not match what this version pins (${expected}). Reinstall to refresh it: npm i -g @venturewild/workspace@latest`,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
// The live daemon is running an older binary than what's installed → it needs
|
|
211
|
+
// a recycle (the always-on supervisor does this on its next tick).
|
|
212
|
+
if (installed && running && installed !== running) {
|
|
213
|
+
return {
|
|
214
|
+
status: 'warn',
|
|
215
|
+
detail,
|
|
216
|
+
hint: `The running daemon (${running}) is older than installed (${installed}). Always-on recycles it automatically; or restart sync (\`wild-workspace daemon stop\` then \`wild-workspace\`).`,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
if (!installed) {
|
|
220
|
+
// PATH/vendor resolution — can't compare versions; the daemonBinary check
|
|
221
|
+
// above already warns about the missing bundled binary.
|
|
222
|
+
return { status: 'info', detail, hint: null };
|
|
223
|
+
}
|
|
224
|
+
return { status: 'ok', detail, hint: null };
|
|
225
|
+
});
|
|
226
|
+
|
|
172
227
|
// 5. Workspace port
|
|
173
228
|
await guarded('port', `Workspace port :${config.port}`, async () => {
|
|
174
229
|
const inUse = await d.checkPort(config.port);
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// Google sign-in vouch (req 2) — the LOCAL-server half of "Sign in with Google".
|
|
2
|
+
//
|
|
3
|
+
// Why a vouch at all: Google's OAuth redirect URI must be a single, pre-registered
|
|
4
|
+
// HTTPS URL, and the code exchange needs the client SECRET — neither can live on a
|
|
5
|
+
// per-user, behind-the-tunnel local server. So bmo-sync (the central relay, which
|
|
6
|
+
// already holds the account registry) owns the Google flow: it exchanges the code,
|
|
7
|
+
// reads the user's verified email, checks it against the workspace's OWNER email,
|
|
8
|
+
// and — only on a match — mints a short-lived VOUCH that it hands back to the local
|
|
9
|
+
// server via `<slug>.venturewild.llc/?gv=<vouch>`. The local server verifies the
|
|
10
|
+
// vouch and mints its own durable device token (reusing the same machinery as a
|
|
11
|
+
// device-approval), so the cookie/role/revocation story is unchanged.
|
|
12
|
+
//
|
|
13
|
+
// The trust anchor is a secret BOTH sides already share, with NO new key to
|
|
14
|
+
// distribute: the account token. bmo-sync stores `sha256(account_token)` in hex
|
|
15
|
+
// (accounts.account_token_hash); the local server has the raw account token and
|
|
16
|
+
// derives the same hex. That hex is the HS256 signing key. A third party can't
|
|
17
|
+
// forge a vouch without the account token (or bmo-sync's at-rest hash), and the
|
|
18
|
+
// local server already trusts bmo-sync (it routes all its traffic), so vouching
|
|
19
|
+
// for identity adds no party that wasn't already in the trust base.
|
|
20
|
+
|
|
21
|
+
import { SignJWT, jwtVerify } from 'jose';
|
|
22
|
+
import { createHash } from 'node:crypto';
|
|
23
|
+
|
|
24
|
+
export const VOUCH_AUDIENCE = 'wild-workspace-google-vouch';
|
|
25
|
+
export const VOUCH_ISSUER = 'bmo-sync';
|
|
26
|
+
// Vouches are single-hop and consumed immediately on the return redirect, so the
|
|
27
|
+
// lifetime is tiny — long enough to survive the 302 + the SPA's POST, no more.
|
|
28
|
+
export const VOUCH_TTL_SECONDS = 120;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* The HS256 key shared with bmo-sync: the lowercase-hex sha256 of the account
|
|
32
|
+
* token, exactly as bmo-sync stores it in `accounts.account_token_hash`
|
|
33
|
+
* (routes/slug.rs::hash_token = `hex::encode(Sha256::digest(token))`). Returns
|
|
34
|
+
* null when the install has no account token (not slug-linked → no Google sign-in).
|
|
35
|
+
*/
|
|
36
|
+
export function vouchKey(accountToken) {
|
|
37
|
+
if (!accountToken) return null;
|
|
38
|
+
const hex = createHash('sha256').update(String(accountToken)).digest('hex');
|
|
39
|
+
return new TextEncoder().encode(hex);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Mint a Google vouch (HS256). This is the contract bmo-sync implements in Rust;
|
|
44
|
+
* we keep a JS minter so the local-server verifier can be tested round-trip and
|
|
45
|
+
* the claim shape has one source of truth. NOT used in production by the server.
|
|
46
|
+
*/
|
|
47
|
+
export async function mintGoogleVouch({ accountToken, email, slug, nowSec = Math.floor(Date.now() / 1000), ttlSeconds = VOUCH_TTL_SECONDS }) {
|
|
48
|
+
const key = vouchKey(accountToken);
|
|
49
|
+
if (!key) throw new Error('no account token');
|
|
50
|
+
return new SignJWT({ email: String(email), slug: slug ? String(slug) : undefined })
|
|
51
|
+
.setProtectedHeader({ alg: 'HS256', typ: 'JWT' })
|
|
52
|
+
.setIssuer(VOUCH_ISSUER)
|
|
53
|
+
.setAudience(VOUCH_AUDIENCE)
|
|
54
|
+
.setIssuedAt(nowSec)
|
|
55
|
+
.setExpirationTime(nowSec + ttlSeconds)
|
|
56
|
+
.sign(key);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Verify a Google vouch against this install's account token. Returns
|
|
61
|
+
* `{ ok: true, email, slug }` or `{ ok: false, reason }`. Never throws. The caller
|
|
62
|
+
* still enforces that `email` matches the configured owner email + `slug` matches
|
|
63
|
+
* this install — the vouch only proves "bmo-sync verified this Google identity for
|
|
64
|
+
* this account", not "this is the owner".
|
|
65
|
+
*/
|
|
66
|
+
export async function verifyGoogleVouch(token, accountToken) {
|
|
67
|
+
const key = vouchKey(accountToken);
|
|
68
|
+
if (!token || !key) return { ok: false, reason: 'no-token-or-key' };
|
|
69
|
+
try {
|
|
70
|
+
const { payload } = await jwtVerify(token, key, {
|
|
71
|
+
issuer: VOUCH_ISSUER,
|
|
72
|
+
audience: VOUCH_AUDIENCE,
|
|
73
|
+
});
|
|
74
|
+
if (!payload.email || typeof payload.email !== 'string') {
|
|
75
|
+
return { ok: false, reason: 'no-email' };
|
|
76
|
+
}
|
|
77
|
+
return { ok: true, email: payload.email, slug: typeof payload.slug === 'string' ? payload.slug : null };
|
|
78
|
+
} catch (e) {
|
|
79
|
+
// jose throws on bad signature / expiry / aud-iss mismatch — all "not valid".
|
|
80
|
+
return { ok: false, reason: e?.code || 'invalid' };
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Case-insensitive owner-email match (Google emails are case-insensitive). */
|
|
85
|
+
export function emailMatches(a, b) {
|
|
86
|
+
return Boolean(a) && Boolean(b) && String(a).trim().toLowerCase() === String(b).trim().toLowerCase();
|
|
87
|
+
}
|
package/server/src/index.mjs
CHANGED
|
@@ -30,6 +30,7 @@ import {
|
|
|
30
30
|
TokenRegistry,
|
|
31
31
|
} from './share.mjs';
|
|
32
32
|
import { PairingStore } from './pairing.mjs';
|
|
33
|
+
import { verifyGoogleVouch, emailMatches } from './google-vouch.mjs';
|
|
33
34
|
import { listDir, readFile, fullTree, workspaceSummary, safeResolve } from './fs.mjs';
|
|
34
35
|
import { InboxWatcher } from './inbox.mjs';
|
|
35
36
|
import { ActivityBus } from './activity.mjs';
|
|
@@ -588,6 +589,14 @@ export async function createServer(overrides = {}) {
|
|
|
588
589
|
return attrs.join('; ');
|
|
589
590
|
}
|
|
590
591
|
|
|
592
|
+
// The host-owner loopback grant (req 1) is only sound when the server is BOUND
|
|
593
|
+
// to loopback — then the port is reachable solely by on-host processes, so the
|
|
594
|
+
// (client-controlled) Host header can't be spoofed from the network. On a
|
|
595
|
+
// 0.0.0.0 / public bind we must NOT trust loopback-looking headers; tokens only.
|
|
596
|
+
const isLocalBind = ['127.0.0.1', 'localhost', '::1', '[::1]'].includes(
|
|
597
|
+
String(config.host || '127.0.0.1').toLowerCase(),
|
|
598
|
+
);
|
|
599
|
+
|
|
591
600
|
// --- auth + role resolution ---
|
|
592
601
|
async function resolveRole(c) {
|
|
593
602
|
// 1. Authorization: Bearer — the only path that may carry the operator token.
|
|
@@ -619,7 +628,16 @@ export async function createServer(overrides = {}) {
|
|
|
619
628
|
if (!config.publicMode) {
|
|
620
629
|
return { role: ROLES.PARTNER, sub: 'local-partner', source: 'localhost' };
|
|
621
630
|
}
|
|
622
|
-
//
|
|
631
|
+
// Host-machine owner over GENUINE loopback (req 1: "same machine = no
|
|
632
|
+
// approval"). The owner opens localhost on the box that runs the workspace and
|
|
633
|
+
// is in — no device-approval. A tunneled visitor can never reach this: the
|
|
634
|
+
// relay stamps x-forwarded-host on every tunneled request (rejected by
|
|
635
|
+
// loopbackHeaders) and the server binds loopback. The only thing trusted here
|
|
636
|
+
// is an on-host process, which already holds the owner's files + secrets.
|
|
637
|
+
if (isLocalBind && isGenuineLoopback(c)) {
|
|
638
|
+
return { role: ROLES.PARTNER, sub: 'local-owner', source: 'loopback' };
|
|
639
|
+
}
|
|
640
|
+
// Public mode, not local, no valid token: deny. No anonymous viewer access —
|
|
623
641
|
// a share JWT or the partner token is required. (Concern C1.)
|
|
624
642
|
return { role: null, sub: 'anon', source: 'unauth', denied: true };
|
|
625
643
|
}
|
|
@@ -692,6 +710,10 @@ export async function createServer(overrides = {}) {
|
|
|
692
710
|
// request can reach it; the handler itself 404s anything that isn't real
|
|
693
711
|
// loopback, so a public visitor can never use it.
|
|
694
712
|
'/api/auth/bootstrap',
|
|
713
|
+
// Google sign-in return (req 2). A browser coming back from Google has no
|
|
714
|
+
// workspace token yet — it carries a bmo-sync vouch, which the handler
|
|
715
|
+
// verifies (an invalid vouch is rejected there), so being public is safe.
|
|
716
|
+
'/api/auth/google',
|
|
695
717
|
]);
|
|
696
718
|
app.use('*', async (c, next) => {
|
|
697
719
|
const session = await resolveRole(c);
|
|
@@ -760,14 +782,22 @@ export async function createServer(overrides = {}) {
|
|
|
760
782
|
// with no forwarding headers + a loopback Host. (B1 — basis for first-device
|
|
761
783
|
// bootstrap. A local process already has the user's filesystem + secrets, so
|
|
762
784
|
// trusting it grants nothing it didn't already have.)
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
785
|
+
// Shared loopback predicate over a header getter — reused by the Hono path
|
|
786
|
+
// (isGenuineLoopback) and the raw-Node WS upgrade path (req.headers). A GENUINE
|
|
787
|
+
// local request carries NONE of the relay's forwarding headers (bmo-sync strips
|
|
788
|
+
// any client-supplied X-Forwarded-*/X-Real-IP and stamps its own on every
|
|
789
|
+
// tunneled request, so they can't be faked from the internet) and has a loopback
|
|
790
|
+
// Host. The server binds loopback, so only an on-host process can produce this.
|
|
791
|
+
function loopbackHeaders(get) {
|
|
792
|
+
if (get('x-forwarded-for') || get('x-forwarded-host') || get('x-forwarded-proto') || get('x-real-ip')) {
|
|
766
793
|
return false;
|
|
767
794
|
}
|
|
768
|
-
const hostname = String(
|
|
795
|
+
const hostname = String(get('host') || '').toLowerCase().split(':')[0];
|
|
769
796
|
return hostname === '127.0.0.1' || hostname === 'localhost' || hostname === '::1' || hostname === '[::1]';
|
|
770
797
|
}
|
|
798
|
+
function isGenuineLoopback(c) {
|
|
799
|
+
return loopbackHeaders((n) => c.req.header(n));
|
|
800
|
+
}
|
|
771
801
|
|
|
772
802
|
// --- auth: first-device bootstrap (B1) -------------------------------------
|
|
773
803
|
// The everyday problem: a brand-new owner runs `wild-workspace` and wants to
|
|
@@ -862,6 +892,47 @@ export async function createServer(overrides = {}) {
|
|
|
862
892
|
return c.json({ ok: true, role: session.role, cookie: true });
|
|
863
893
|
});
|
|
864
894
|
|
|
895
|
+
// Google sign-in return (req 2). bmo-sync ran the Google OAuth centrally, checked
|
|
896
|
+
// the verified email against THIS account's owner, and handed the browser back a
|
|
897
|
+
// short-lived vouch via <slug>.venturewild.llc/?gv=<vouch>. The SPA POSTs it here;
|
|
898
|
+
// we verify it against the shared account secret, re-confirm the owner email +
|
|
899
|
+
// slug, then mint the SAME durable, individually-revocable device token a device
|
|
900
|
+
// approval would — so any device the owner signs into with Google is in, no
|
|
901
|
+
// second-device approval and no nag. Public-allowlisted: the vouch IS the
|
|
902
|
+
// credential, so an invalid one is rejected here (401/403), never trusted.
|
|
903
|
+
app.post('/api/auth/google', async (c) => {
|
|
904
|
+
// accountToken is kept top-level on config (out of the broadcast config.account).
|
|
905
|
+
if (!config.accountToken) {
|
|
906
|
+
return c.json({ error: 'not-linked' }, 400); // not slug-linked → no Google sign-in
|
|
907
|
+
}
|
|
908
|
+
let body = {};
|
|
909
|
+
try { body = await c.req.json(); } catch { /* empty body ok */ }
|
|
910
|
+
const vouch = typeof body.vouch === 'string' ? body.vouch : c.req.query('gv');
|
|
911
|
+
const v = await verifyGoogleVouch(vouch, config.accountToken);
|
|
912
|
+
if (!v.ok) {
|
|
913
|
+
log('[auth]', `google vouch rejected: ${v.reason}`);
|
|
914
|
+
return c.json({ error: 'invalid-vouch' }, 401);
|
|
915
|
+
}
|
|
916
|
+
// The vouch proves "bmo-sync verified this Google identity for this account".
|
|
917
|
+
// Re-enforce that it's the OWNER of THIS install (email + slug) so a vouch
|
|
918
|
+
// minted for another workspace can't be replayed here.
|
|
919
|
+
if (!emailMatches(v.email, config.account.email)) {
|
|
920
|
+
log('[auth]', `google vouch email mismatch (${v.email})`);
|
|
921
|
+
return c.json({ error: 'not-owner' }, 403);
|
|
922
|
+
}
|
|
923
|
+
if (v.slug && config.account.slug && v.slug !== config.account.slug) {
|
|
924
|
+
return c.json({ error: 'wrong-workspace' }, 403);
|
|
925
|
+
}
|
|
926
|
+
const device = await mintDeviceToken({ secret: config.shareSecret, workspaceId: config.workspaceId });
|
|
927
|
+
tokenRegistry.add({ ...device, kind: 'device', label: `Google (${v.email})`, createdAt: Date.now() });
|
|
928
|
+
const now = Math.floor(Date.now() / 1000);
|
|
929
|
+
const maxAge = Math.max(60, device.exp - now);
|
|
930
|
+
c.header('Set-Cookie', authCookieAttrs(device.token, maxAge));
|
|
931
|
+
auditAction(c, 'google-signin', `email=${v.email} sub=${device.sub}`);
|
|
932
|
+
log('[auth]', `google sign-in → durable device sub=${device.sub} email=${v.email} ttl=${maxAge}s`);
|
|
933
|
+
return c.json({ ok: true, role: 'partner', cookie: true });
|
|
934
|
+
});
|
|
935
|
+
|
|
865
936
|
app.post('/api/auth/logout', (c) => {
|
|
866
937
|
c.header('Set-Cookie', authCookieAttrs('', 0));
|
|
867
938
|
return c.json({ ok: true });
|
|
@@ -2078,6 +2149,11 @@ export async function createServer(overrides = {}) {
|
|
|
2078
2149
|
} else if (!cookieToken && !tokenFromQuery && !config.publicMode) {
|
|
2079
2150
|
role = ROLES.PARTNER;
|
|
2080
2151
|
sub = 'local-partner';
|
|
2152
|
+
} else if (!cookieToken && !tokenFromQuery && isLocalBind && loopbackHeaders((n) => req.headers[n])) {
|
|
2153
|
+
// Host-machine owner over genuine loopback (req 1) — mirror resolveRole so
|
|
2154
|
+
// the chat WS works for the local owner with no token, same as the SPA.
|
|
2155
|
+
role = ROLES.PARTNER;
|
|
2156
|
+
sub = 'local-owner';
|
|
2081
2157
|
}
|
|
2082
2158
|
// Deny: public mode with no token, or any invalid/revoked token. An
|
|
2083
2159
|
// invalid token must NOT silently fall back to partner. (Concern C1.)
|
package/server/src/service.mjs
CHANGED
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
// synced workspace (locked principle #1). Every external touch-point (reg.exe,
|
|
25
25
|
// launchctl, kill) is an injected seam for testability.
|
|
26
26
|
|
|
27
|
-
import { execFile } from 'node:child_process';
|
|
27
|
+
import { execFile, spawn } from 'node:child_process';
|
|
28
28
|
import { promisify } from 'node:util';
|
|
29
29
|
import fs from 'node:fs';
|
|
30
30
|
import os from 'node:os';
|
|
@@ -327,6 +327,102 @@ async function linuxStatus({ dir, systemdUserDir, execFileImpl, probeImpl, port
|
|
|
327
327
|
return { installed, runValue: installed ? unit : null, supervisorPid, supervisorAlive, serverUp, enabled, active };
|
|
328
328
|
}
|
|
329
329
|
|
|
330
|
+
// --- self-restart: re-exec the supervisor to load freshly-installed code -----
|
|
331
|
+
//
|
|
332
|
+
// After an auto-update installs new code, the long-lived SUPERVISOR keeps running
|
|
333
|
+
// the OLD code until it restarts — RC1b restarts the server CHILD, never the
|
|
334
|
+
// supervisor parent. That's the go-live "stale-process-after-update chain"
|
|
335
|
+
// (remote-support-and-self-healing-design.md Part 8): the supervisor's daemon-
|
|
336
|
+
// drift recycle logic can't run, so the daemon stays on the old binary and the
|
|
337
|
+
// support channel silently 504s. restartSelf() restarts the supervisor itself,
|
|
338
|
+
// per-OS, so the whole stack lands new code with NO reboot:
|
|
339
|
+
// - macOS: launchctl kickstart -k gui/<uid>/<label> (launchd kills + relaunches us)
|
|
340
|
+
// - Linux: systemctl --user restart <unit> (only when systemd-managed)
|
|
341
|
+
// - Windows: re-spawn the hidden VBS launcher; the caller then exits so the
|
|
342
|
+
// successor takes the singleton lock (no service manager to do it).
|
|
343
|
+
//
|
|
344
|
+
// SAFE BY CONSTRUCTION — never kill the only supervisor on a non-managed run:
|
|
345
|
+
// - mac: kickstart errors when the job isn't loaded (manual `service run`) →
|
|
346
|
+
// reported not-restarted, supervisor keeps running (old code, same as
|
|
347
|
+
// before this feature) rather than dying.
|
|
348
|
+
// - linux: gated on INVOCATION_ID (systemd sets it for its own services); a
|
|
349
|
+
// manual run has none → no-op (a `restart` would otherwise spawn a
|
|
350
|
+
// SECOND supervisor that collides on the singleton lock).
|
|
351
|
+
// - win: only re-spawns when the installed launcher exists.
|
|
352
|
+
// On mac/Linux the service manager kills+sequences the restart (no lock race). On
|
|
353
|
+
// Windows the caller exits AFTER we've spawned the successor; the successor's node
|
|
354
|
+
// boot (~hundreds of ms) outlasts the caller's lock release, so it takes over
|
|
355
|
+
// cleanly — and a lost race merely falls back to the next-login launch (no user
|
|
356
|
+
// downtime: the server + daemon are independent processes that keep serving).
|
|
357
|
+
|
|
358
|
+
async function macRestartSelf({ execFileImpl, uid, label }) {
|
|
359
|
+
const target = `gui/${uid}/${label}`;
|
|
360
|
+
try {
|
|
361
|
+
await execFileImpl('launchctl', ['kickstart', '-k', target]);
|
|
362
|
+
return { restarted: true, method: 'launchctl-kickstart', target };
|
|
363
|
+
} catch (e) {
|
|
364
|
+
return { restarted: false, method: 'launchctl-kickstart', target, error: String(e?.message || e).split('\n')[0] };
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
async function linuxRestartSelf({ execFileImpl, env, unit }) {
|
|
369
|
+
if (!env.INVOCATION_ID) {
|
|
370
|
+
return { restarted: false, method: 'systemctl', unit, reason: 'not-systemd-managed' };
|
|
371
|
+
}
|
|
372
|
+
try {
|
|
373
|
+
await execFileImpl('systemctl', ['--user', 'restart', unit]);
|
|
374
|
+
return { restarted: true, method: 'systemctl', unit };
|
|
375
|
+
} catch (e) {
|
|
376
|
+
return { restarted: false, method: 'systemctl', unit, error: String(e?.message || e).split('\n')[0] };
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
function winRestartSelf({ dir, spawnImpl }) {
|
|
381
|
+
const vbs = path.join(dir, 'launch-hidden.vbs');
|
|
382
|
+
if (!fs.existsSync(vbs)) {
|
|
383
|
+
return { restarted: false, method: 'win-relaunch', reason: 'launcher-absent' };
|
|
384
|
+
}
|
|
385
|
+
try {
|
|
386
|
+
const child = spawnImpl('wscript.exe', [vbs], { detached: true, windowsHide: true, stdio: 'ignore' });
|
|
387
|
+
child?.unref?.();
|
|
388
|
+
// willExit: the caller MUST process.exit() so the successor can take the lock.
|
|
389
|
+
return { restarted: true, method: 'win-relaunch', launcher: vbs, willExit: true };
|
|
390
|
+
} catch (e) {
|
|
391
|
+
return { restarted: false, method: 'win-relaunch', launcher: vbs, error: String(e?.message || e).split('\n')[0] };
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Restart the always-on supervisor process so freshly-installed supervisor code
|
|
397
|
+
* loads (the Part-8 stale-process fix). Returns { restarted, method, ... }; a
|
|
398
|
+
* `willExit:true` (Windows) tells the caller to process.exit() after we return so
|
|
399
|
+
* the just-spawned successor can take the singleton lock. Never throws.
|
|
400
|
+
*/
|
|
401
|
+
export async function restartSelf(opts = {}, deps = {}) {
|
|
402
|
+
const platform = deps.platform || process.platform;
|
|
403
|
+
// dir (where the Windows launcher lives) may come from the operational opts
|
|
404
|
+
// (the supervisor passes its configured globalDir) or the test deps.
|
|
405
|
+
const dir = opts.dir || deps.dir || globalDir();
|
|
406
|
+
if (platform === 'darwin') {
|
|
407
|
+
return macRestartSelf({
|
|
408
|
+
execFileImpl: deps.execFileImpl || execFileP,
|
|
409
|
+
uid: deps.uid ?? currentUid(),
|
|
410
|
+
label: deps.label || LAUNCHD_LABEL,
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
if (platform === 'linux') {
|
|
414
|
+
return linuxRestartSelf({
|
|
415
|
+
execFileImpl: deps.execFileImpl || execFileP,
|
|
416
|
+
env: deps.env || process.env,
|
|
417
|
+
unit: deps.unit || SYSTEMD_UNIT,
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
if (platform === 'win32') {
|
|
421
|
+
return winRestartSelf({ dir, spawnImpl: deps.spawnImpl || spawn });
|
|
422
|
+
}
|
|
423
|
+
return { restarted: false, supported: false, platform };
|
|
424
|
+
}
|
|
425
|
+
|
|
330
426
|
// --- public API (platform dispatch) ----------------------------------------
|
|
331
427
|
|
|
332
428
|
const unsupported = (platform, key) => ({
|
|
@@ -26,6 +26,7 @@ import os from 'node:os';
|
|
|
26
26
|
import path from 'node:path';
|
|
27
27
|
import { fileURLToPath } from 'node:url';
|
|
28
28
|
import { resolveDaemonVersion } from './daemon-bin.mjs';
|
|
29
|
+
import { restartSelf } from './service.mjs';
|
|
29
30
|
|
|
30
31
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
31
32
|
const DEFAULT_SERVER_ENTRY = path.join(__dirname, 'index.mjs');
|
|
@@ -83,6 +84,13 @@ export function installedVersion(entry = DEFAULT_SERVER_ENTRY) {
|
|
|
83
84
|
}
|
|
84
85
|
}
|
|
85
86
|
|
|
87
|
+
// Captured ONCE at module load = the version of the code THIS supervisor process
|
|
88
|
+
// is running. A fresh installedVersion() reads disk, which moves ahead after an
|
|
89
|
+
// in-place `npm i -g`; the difference is the supervisor's OWN staleness (the
|
|
90
|
+
// Part-8 gap). Distinct from APP_VERSION only in that we read the same file the
|
|
91
|
+
// drift check reads, so they're guaranteed equal at startup (no false drift).
|
|
92
|
+
export const SUPERVISOR_VERSION = installedVersion();
|
|
93
|
+
|
|
86
94
|
export class WorkspaceSupervisor {
|
|
87
95
|
constructor({
|
|
88
96
|
serverEntry = DEFAULT_SERVER_ENTRY,
|
|
@@ -134,6 +142,26 @@ export class WorkspaceSupervisor {
|
|
|
134
142
|
// under (tracked in `daemon-runtime.json`, since the daemon's /health reports
|
|
135
143
|
// no version). Test seam: inject a version function.
|
|
136
144
|
daemonVersionImpl = () => resolveDaemonVersion({ env }),
|
|
145
|
+
// Supervisor self-restart after auto-update (the Part-8 stale-process fix):
|
|
146
|
+
// once an update installs new code and the server child restarts + verifies
|
|
147
|
+
// healthy, the supervisor must restart ITSELF so its own new code (e.g. the
|
|
148
|
+
// daemon-drift recycle) loads — RC1b only restarts the child. Per-OS re-exec
|
|
149
|
+
// lives in service.mjs::restartSelf. On by default; kill switch
|
|
150
|
+
// WILD_WORKSPACE_NO_SELF_RESTART=1. A cooldown + a once-per-process guard
|
|
151
|
+
// prevent any restart loop; the delay lets the triggering update tick unwind
|
|
152
|
+
// and logs flush first. All seams injected (no real exit/spawn in tests).
|
|
153
|
+
selfRestart = env.WILD_WORKSPACE_NO_SELF_RESTART !== '1',
|
|
154
|
+
selfRestartCooldownMs = 10 * 60 * 1000,
|
|
155
|
+
selfRestartDelayMs = 3000,
|
|
156
|
+
restartSelfImpl = restartSelf,
|
|
157
|
+
exitImpl = (code = 0) => process.exit(code),
|
|
158
|
+
scheduleImpl = (fn, ms) => { const t = setTimeout(fn, ms); if (t.unref) t.unref(); return t; },
|
|
159
|
+
// The version THIS supervisor process is running (captured at module load).
|
|
160
|
+
// The self-drift backstop self-restarts when the installed-on-disk version
|
|
161
|
+
// moves ahead of this — covering EVERY update path (our auto-updater, the
|
|
162
|
+
// operator `update-now`, the CLI `update apply`, a manual `npm i -g`), not
|
|
163
|
+
// just our own. null disables the backstop (tests default to null).
|
|
164
|
+
selfVersion = SUPERVISOR_VERSION,
|
|
137
165
|
} = {}) {
|
|
138
166
|
Object.assign(this, {
|
|
139
167
|
serverEntry, workspaceDir, port, globalDir, node, pollMs,
|
|
@@ -142,6 +170,8 @@ export class WorkspaceSupervisor {
|
|
|
142
170
|
autoRestartOnVersionDrift, versionImpl, installedVersionImpl,
|
|
143
171
|
autoUpdate, updatePollMs, autoUpdaterFactory,
|
|
144
172
|
superviseDaemon, daemonPollMs, daemonSupervisorFactory, daemonVersionImpl,
|
|
173
|
+
selfRestart, selfRestartCooldownMs, selfRestartDelayMs, restartSelfImpl, exitImpl, scheduleImpl,
|
|
174
|
+
selfVersion,
|
|
145
175
|
});
|
|
146
176
|
this.autoUpdater = null;
|
|
147
177
|
this.updateTimer = null;
|
|
@@ -149,6 +179,10 @@ export class WorkspaceSupervisor {
|
|
|
149
179
|
this.daemonTimer = null;
|
|
150
180
|
this._daemonTicking = false;
|
|
151
181
|
this.daemonRuntimeFile = path.join(globalDir, 'daemon-runtime.json');
|
|
182
|
+
// Persists the last self-restart time so a fresh post-re-exec supervisor
|
|
183
|
+
// honours the cooldown too (belt-and-suspenders against a restart loop).
|
|
184
|
+
this.selfRestartFile = path.join(globalDir, 'self-restart.json');
|
|
185
|
+
this._selfRestartScheduled = false;
|
|
152
186
|
this.logFile = path.join(globalDir, 'supervisor.log');
|
|
153
187
|
this.serverLogFile = path.join(globalDir, 'server.out.log');
|
|
154
188
|
this.lockFile = path.join(globalDir, 'supervisor.lock');
|
|
@@ -243,6 +277,10 @@ export class WorkspaceSupervisor {
|
|
|
243
277
|
this.restartChild();
|
|
244
278
|
return 'restart-requested';
|
|
245
279
|
}
|
|
280
|
+
// Part-8 backstop: if disk moved ahead of our own code (any update path),
|
|
281
|
+
// schedule a supervisor self-restart. Side-effect only — never changes the
|
|
282
|
+
// tick decision below (server/daemon healing proceeds as usual meanwhile).
|
|
283
|
+
this.maybeSelfRestartOnDrift();
|
|
246
284
|
if (await this.probeImpl(this.port, this.probeTimeoutMs)) {
|
|
247
285
|
this.backoff = this.backoffStartMs; // healthy → reset backoff
|
|
248
286
|
this.spawnCount = 0; // healthy → not a crash loop
|
|
@@ -347,6 +385,96 @@ export class WorkspaceSupervisor {
|
|
|
347
385
|
return true;
|
|
348
386
|
}
|
|
349
387
|
|
|
388
|
+
/** The last self-restart time (epoch ms), or 0. Used for the loop-guard cooldown. */
|
|
389
|
+
readLastSelfRestart() {
|
|
390
|
+
try { return Number(JSON.parse(fs.readFileSync(this.selfRestartFile, 'utf8')).at) || 0; }
|
|
391
|
+
catch { return 0; }
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
writeLastSelfRestart(at) {
|
|
395
|
+
try {
|
|
396
|
+
fs.mkdirSync(this.globalDir, { recursive: true });
|
|
397
|
+
fs.writeFileSync(this.selfRestartFile, JSON.stringify({ at }));
|
|
398
|
+
} catch { /* best-effort */ }
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Schedule a supervisor self-restart so freshly-installed SUPERVISOR code loads
|
|
403
|
+
* (the Part-8 stale-process fix). Called from the AutoUpdater's onUpdate hook
|
|
404
|
+
* AFTER an update installed + restarted the server child + verified it healthy —
|
|
405
|
+
* so a bad release has already rolled back before we re-exec ourselves. Guarded
|
|
406
|
+
* three ways against a restart loop: the kill switch, a once-per-process flag,
|
|
407
|
+
* and a persisted cooldown (survives the re-exec). Returns a status string
|
|
408
|
+
* ('scheduled' | 'disabled' | 'already' | 'cooldown') for tests/logging. The
|
|
409
|
+
* actual restart runs on a short delay so the triggering tick unwinds first.
|
|
410
|
+
*/
|
|
411
|
+
scheduleSelfRestart(reason) {
|
|
412
|
+
if (!this.selfRestart) return 'disabled';
|
|
413
|
+
if (this._selfRestartScheduled) return 'already';
|
|
414
|
+
const now = this.nowImpl();
|
|
415
|
+
const last = this.readLastSelfRestart();
|
|
416
|
+
if (last && now - last < this.selfRestartCooldownMs) {
|
|
417
|
+
this.log(`self-restart skipped (cooldown, last ${Math.round((now - last) / 1000)}s ago) — ${reason}`);
|
|
418
|
+
return 'cooldown';
|
|
419
|
+
}
|
|
420
|
+
this._selfRestartScheduled = true;
|
|
421
|
+
this.writeLastSelfRestart(now);
|
|
422
|
+
this.log(`self-restart scheduled in ${this.selfRestartDelayMs}ms — ${reason}`);
|
|
423
|
+
this.scheduleImpl(() => {
|
|
424
|
+
this._performSelfRestart(reason).catch((e) => this.log(`self-restart error: ${e?.message || e}`));
|
|
425
|
+
}, this.selfRestartDelayMs);
|
|
426
|
+
return 'scheduled';
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* Carry out the self-restart. On mac/Linux the service manager kills+relaunches
|
|
431
|
+
* us (we just issue the command and get SIGTERM'd → our exit handler releases the
|
|
432
|
+
* lock). On Windows restartSelf spawned a hidden successor and returns
|
|
433
|
+
* willExit:true — we then release the lock (via stop()) and exit so the successor
|
|
434
|
+
* can take it. A non-managed run reports restarted:false and we stay up on the
|
|
435
|
+
* old code (no worse than before this feature). Never throws.
|
|
436
|
+
*/
|
|
437
|
+
async _performSelfRestart(reason) {
|
|
438
|
+
this.log(`self-restart now — ${reason}`);
|
|
439
|
+
let r;
|
|
440
|
+
try {
|
|
441
|
+
r = await this.restartSelfImpl({ dir: this.globalDir, port: this.port });
|
|
442
|
+
} catch (e) {
|
|
443
|
+
this.log(`self-restart impl error: ${e?.message || e}`);
|
|
444
|
+
return { restarted: false, error: e?.message || String(e) };
|
|
445
|
+
}
|
|
446
|
+
this.log(`self-restart result: ${JSON.stringify(r)}`);
|
|
447
|
+
if (r && r.willExit) {
|
|
448
|
+
this.stop(); // clears timers + releases the lock so the successor can take it
|
|
449
|
+
this.exitImpl(0);
|
|
450
|
+
}
|
|
451
|
+
return r;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Backstop for the Part-8 gap on EVERY update path, not just our own auto-
|
|
456
|
+
* updater: when the version installed on disk no longer matches the code THIS
|
|
457
|
+
* supervisor is running, the supervisor is stale → schedule a self-restart.
|
|
458
|
+
* RC1b already restarts the stale server child and daemonTick recycles the
|
|
459
|
+
* stale daemon; this is the missing third leg (the supervisor itself), so an
|
|
460
|
+
* operator `update-now` / CLI `update apply` / manual `npm i -g` also lands new
|
|
461
|
+
* supervisor code with no reboot. Skipped while OUR auto-updater is mid-flight
|
|
462
|
+
* so the rollback window is respected (that path self-restarts via the onUpdate
|
|
463
|
+
* hook, only after verify succeeds). Cheap (an in-memory compare guarding a disk
|
|
464
|
+
* read) and idempotent (scheduleSelfRestart de-dupes). Never throws.
|
|
465
|
+
*/
|
|
466
|
+
maybeSelfRestartOnDrift() {
|
|
467
|
+
if (!this.selfRestart || !this.selfVersion) return false;
|
|
468
|
+
if (this._selfRestartScheduled) return false;
|
|
469
|
+
if (this.autoUpdater && this.autoUpdater.inProgress) return false; // respect rollback window
|
|
470
|
+
let installed = null;
|
|
471
|
+
try { installed = this.installedVersionImpl(); } catch { return false; }
|
|
472
|
+
if (!installed || installed === this.selfVersion) return false;
|
|
473
|
+
this.log(`supervisor version drift: running=${this.selfVersion} installed=${installed} — self-restarting`);
|
|
474
|
+
this.scheduleSelfRestart(`supervisor drift ${this.selfVersion}→${installed}`);
|
|
475
|
+
return true;
|
|
476
|
+
}
|
|
477
|
+
|
|
350
478
|
/** Build the AutoUpdater bound to this supervisor. Separated for the test seam. */
|
|
351
479
|
async buildAutoUpdater() {
|
|
352
480
|
if (this.autoUpdaterFactory) return this.autoUpdaterFactory(this);
|
|
@@ -362,7 +490,16 @@ export class WorkspaceSupervisor {
|
|
|
362
490
|
nowImpl: this.nowImpl,
|
|
363
491
|
env: this.env,
|
|
364
492
|
logImpl: (m) => this.log(m),
|
|
365
|
-
onUpdate: (rec) =>
|
|
493
|
+
onUpdate: (rec) => {
|
|
494
|
+
this.log(`auto-update result: ${rec.from || '?'}→${rec.to} ${rec.status}`);
|
|
495
|
+
// A genuine version change landed healthy → restart the supervisor itself
|
|
496
|
+
// so its own new code loads (Part-8 stale-process fix). Guarded against
|
|
497
|
+
// loops inside scheduleSelfRestart. Fires only on a real bump (to≠from),
|
|
498
|
+
// never on rollback/failure (those statuses aren't 'ok').
|
|
499
|
+
if (rec.status === 'ok' && rec.to && rec.from && rec.to !== rec.from) {
|
|
500
|
+
this.scheduleSelfRestart(`auto-update ${rec.from}→${rec.to}`);
|
|
501
|
+
}
|
|
502
|
+
},
|
|
366
503
|
});
|
|
367
504
|
}
|
|
368
505
|
|
|
@@ -469,7 +606,7 @@ export class WorkspaceSupervisor {
|
|
|
469
606
|
process.on('exit', () => this.releaseLock());
|
|
470
607
|
process.on('SIGTERM', () => process.exit(0));
|
|
471
608
|
process.on('SIGINT', () => process.exit(0));
|
|
472
|
-
this.log(`supervisor start pid=${process.pid} watching http://127.0.0.1:${this.port}/api/health (workspace=${this.workspaceDir})`);
|
|
609
|
+
this.log(`supervisor start pid=${process.pid} v${this.selfVersion || '?'} watching http://127.0.0.1:${this.port}/api/health (workspace=${this.workspaceDir})`);
|
|
473
610
|
this.timer = setInterval(() => { this.tick().catch((e) => this.log(`tick error: ${e?.message || e}`)); }, this.pollMs);
|
|
474
611
|
this.tick().catch((e) => this.log(`tick error: ${e?.message || e}`));
|
|
475
612
|
|