@shogo-ai/worker 1.9.9 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,355 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (C) 2026 Shogo Technologies, Inc.
3
+ //
4
+ // Local SQLite migration "doctor" for the Shogo desktop app.
5
+ //
6
+ // The packaged desktop app stores its database at
7
+ // `<userData>/data/shogo.db` and applies schema with
8
+ // `prisma migrate deploy` on every launch. When a migration throws
9
+ // mid-way it leaves a row in `_prisma_migrations` with
10
+ // `finished_at = NULL`, and Prisma's P3009 check then refuses to run
11
+ // ANY further migrations — the app is wedged until that ledger row is
12
+ // cleared. The desktop app surfaces a recovery dialog when this happens
13
+ // on boot (see apps/desktop/src/db-recovery.ts), but a user whose app
14
+ // won't open, or who is being walked through a fix by support, has no
15
+ // way to trigger that repair from a terminal.
16
+ //
17
+ // `shogo doctor` (this module) is that terminal entry point. It performs
18
+ // the SAME safe, dependency-free repair the desktop dialog does:
19
+ //
20
+ // 1. detectFailedMigrations() — find stuck `_prisma_migrations` rows.
21
+ // 2. backupDatabase() — snapshot `shogo.db` (+ -wal/-shm)
22
+ // before touching anything.
23
+ // 3. repairFailedMigrations() — delete the stuck rows (equivalent to
24
+ // `prisma migrate resolve --rolled-back`).
25
+ //
26
+ // It deliberately does NOT re-run `prisma migrate deploy` itself — the
27
+ // CLI doesn't ship the Prisma schema/migration history that lives inside
28
+ // the desktop app bundle. Instead it clears the wedge and tells the user
29
+ // to relaunch Shogo, which re-applies migrations on its next boot.
30
+ //
31
+ // Why shell out to bun rather than link a SQLite driver
32
+ // -----------------------------------------------------
33
+ // The worker CLI runs under Node (npm install) OR Bun (tarball release),
34
+ // and we don't want a native `better-sqlite3` build step. The desktop
35
+ // app already ships a `bun` binary with `bun:sqlite` statically linked,
36
+ // and that's the exact SQLite version Prisma's bun-sqlite adapter uses at
37
+ // runtime — so running `bun -e "<small script>"` against it is both
38
+ // dependency-free and driver-version-matched. This mirrors
39
+ // apps/desktop/src/db-recovery.ts (kept as a separate copy there because
40
+ // the Electron main process is bundled in a different runtime context).
41
+
42
+ import { execFileSync } from 'node:child_process';
43
+ import { existsSync, copyFileSync } from 'node:fs';
44
+ import path from 'node:path';
45
+ import { homedir, platform } from 'node:os';
46
+
47
+ export interface FailedMigration {
48
+ name: string;
49
+ /** Epoch milliseconds the migration was attempted. */
50
+ startedAt: number;
51
+ /** First 600 chars of the Prisma error log row, for display. */
52
+ errorExcerpt: string;
53
+ }
54
+
55
+ /**
56
+ * Run a one-shot bun script against the given DB and parse its stdout
57
+ * as JSON. The script reads the DB path from `process.env.DBP` so we
58
+ * don't have to escape paths (which can contain spaces) through shell
59
+ * quoting. Throws if bun isn't usable or the script bails — we do NOT
60
+ * swallow these, since a broken recovery layer should surface its own
61
+ * defect rather than a misleading "database looks fine".
62
+ */
63
+ function runBunScript<T>(bunPath: string, dbPath: string, script: string): T {
64
+ const out = execFileSync(bunPath, ['-e', script], {
65
+ env: { ...process.env, DBP: dbPath },
66
+ encoding: 'utf-8',
67
+ stdio: ['ignore', 'pipe', 'pipe'],
68
+ timeout: 5000,
69
+ });
70
+ const trimmed = out.trim();
71
+ if (!trimmed) {
72
+ throw new Error(`bun script returned empty output for ${dbPath}`);
73
+ }
74
+ return JSON.parse(trimmed) as T;
75
+ }
76
+
77
+ /**
78
+ * Return `_prisma_migrations` rows that are still failed (`finished_at`
79
+ * NULL) and not yet recovered (`rolled_back_at` NULL). Returns an empty
80
+ * array when the DB doesn't exist, the table hasn't been created, or no
81
+ * failures are present.
82
+ */
83
+ export function detectFailedMigrations(bunPath: string, dbPath: string): FailedMigration[] {
84
+ if (!existsSync(dbPath)) return [];
85
+
86
+ const script = `
87
+ import { Database } from 'bun:sqlite';
88
+ try {
89
+ const db = new Database(process.env.DBP, { readonly: true });
90
+ const hasTable = db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='_prisma_migrations'").get();
91
+ if (!hasTable) { console.log('[]'); process.exit(0); }
92
+ const rows = db
93
+ .query("SELECT migration_name as name, started_at as startedAt, substr(coalesce(logs, ''), 1, 600) as errorExcerpt FROM _prisma_migrations WHERE finished_at IS NULL AND rolled_back_at IS NULL ORDER BY started_at")
94
+ .all();
95
+ console.log(JSON.stringify(rows));
96
+ } catch (e) {
97
+ console.error(String(e?.stack || e));
98
+ process.exit(2);
99
+ }
100
+ `;
101
+
102
+ return runBunScript<FailedMigration[]>(bunPath, dbPath, script);
103
+ }
104
+
105
+ /**
106
+ * Snapshot `shogo.db` (plus `-wal`/`-shm` sidecars if present) to a
107
+ * timestamped sibling file. Returns the backup path of the main DB.
108
+ * Uses plain copies (not SQLite's online backup) because the DB is not
109
+ * open at this point and `copyFileSync` works even on a DB SQLite would
110
+ * refuse to open. Throws on any I/O failure — the caller MUST treat that
111
+ * as "do not proceed with repair".
112
+ */
113
+ export function backupDatabase(dbPath: string): string {
114
+ if (!existsSync(dbPath)) {
115
+ throw new Error(`Database does not exist at ${dbPath} — refusing to back up nothing`);
116
+ }
117
+ const dir = path.dirname(dbPath);
118
+ const base = path.basename(dbPath);
119
+ const stamp = new Date().toISOString().replace(/[:.]/g, '-');
120
+ const backupPath = path.join(dir, `${base}.bak-${stamp}`);
121
+ copyFileSync(dbPath, backupPath);
122
+
123
+ for (const suffix of ['-wal', '-shm']) {
124
+ const sidecar = `${dbPath}${suffix}`;
125
+ if (existsSync(sidecar)) {
126
+ copyFileSync(sidecar, `${backupPath}${suffix}`);
127
+ }
128
+ }
129
+ return backupPath;
130
+ }
131
+
132
+ /**
133
+ * Delete the named failed-migration rows from `_prisma_migrations`. This
134
+ * is the equivalent of `prisma migrate resolve --rolled-back <name>` for
135
+ * each, but without needing the schema-engine binary. Only deletes rows
136
+ * that are actually still failed (defends against a stale name list).
137
+ * Returns the number of rows deleted.
138
+ *
139
+ * The caller is expected to have run `backupDatabase()` first. After this,
140
+ * the next `prisma migrate deploy` (on the desktop app's next launch)
141
+ * re-attempts the migration.
142
+ */
143
+ export function repairFailedMigrations(
144
+ bunPath: string,
145
+ dbPath: string,
146
+ migrationNames: string[],
147
+ ): number {
148
+ if (migrationNames.length === 0) return 0;
149
+ if (!existsSync(dbPath)) {
150
+ throw new Error(`Cannot repair: database does not exist at ${dbPath}`);
151
+ }
152
+
153
+ const namesJson = JSON.stringify(migrationNames);
154
+ const script = `
155
+ import { Database } from 'bun:sqlite';
156
+ const names = ${namesJson};
157
+ if (!Array.isArray(names) || names.some(n => typeof n !== 'string')) {
158
+ console.error('Invalid migration name list');
159
+ process.exit(2);
160
+ }
161
+ try {
162
+ const db = new Database(process.env.DBP, { create: false, readwrite: true });
163
+ const placeholders = names.map(() => '?').join(',');
164
+ const stmt = db.prepare(
165
+ \`DELETE FROM _prisma_migrations WHERE migration_name IN (\${placeholders}) AND finished_at IS NULL AND rolled_back_at IS NULL\`
166
+ );
167
+ const result = stmt.run(...names);
168
+ console.log(JSON.stringify({ deleted: Number(result.changes) }));
169
+ } catch (e) {
170
+ console.error(String(e?.stack || e));
171
+ process.exit(3);
172
+ }
173
+ `;
174
+
175
+ const out = runBunScript<{ deleted: number }>(bunPath, dbPath, script);
176
+ return out.deleted;
177
+ }
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // Orchestrator
181
+ // ---------------------------------------------------------------------------
182
+
183
+ export type DoctorStatus = 'healthy' | 'no-database' | 'repaired' | 'failed';
184
+
185
+ export interface DoctorResult {
186
+ status: DoctorStatus;
187
+ /** Migrations found in a failed state before repair. */
188
+ detected: FailedMigration[];
189
+ /** Path of the backup written before repair, if any. */
190
+ backupPath?: string;
191
+ /** Names of migration rows actually cleared. */
192
+ cleared: string[];
193
+ /** Migrations still failed after the repair attempt (should be empty on success). */
194
+ remaining: FailedMigration[];
195
+ /** Human-readable summary of what happened. */
196
+ message: string;
197
+ }
198
+
199
+ export interface DoctorOptions {
200
+ bunPath: string;
201
+ dbPath: string;
202
+ /** Skip the pre-repair backup (default: false). Discouraged. */
203
+ skipBackup?: boolean;
204
+ /** Logger for progress lines (default: no-op). */
205
+ log?: (line: string) => void;
206
+ }
207
+
208
+ /**
209
+ * Run the full safe repair sequence against a local SQLite DB:
210
+ * detect → backup → clear stuck ledger rows → re-detect. Idempotent:
211
+ * a healthy DB is a no-op. Never re-runs `migrate deploy` (that's the
212
+ * desktop app's job on next launch).
213
+ */
214
+ export function runDatabaseDoctor(opts: DoctorOptions): DoctorResult {
215
+ const log = opts.log ?? (() => {});
216
+ const { bunPath, dbPath } = opts;
217
+
218
+ if (!existsSync(dbPath)) {
219
+ return {
220
+ status: 'no-database',
221
+ detected: [],
222
+ cleared: [],
223
+ remaining: [],
224
+ message: `No database found at ${dbPath}. Nothing to repair — launch Shogo once to create it.`,
225
+ };
226
+ }
227
+
228
+ const detected = detectFailedMigrations(bunPath, dbPath);
229
+ if (detected.length === 0) {
230
+ return {
231
+ status: 'healthy',
232
+ detected: [],
233
+ cleared: [],
234
+ remaining: [],
235
+ message: 'No failed migrations detected — the local database looks healthy.',
236
+ };
237
+ }
238
+
239
+ log(`Found ${detected.length} failed migration(s): ${detected.map((m) => m.name).join(', ')}`);
240
+
241
+ let backupPath: string | undefined;
242
+ if (!opts.skipBackup) {
243
+ backupPath = backupDatabase(dbPath);
244
+ log(`Backed up database to ${backupPath}`);
245
+ }
246
+
247
+ const names = detected.map((m) => m.name);
248
+ const deleted = repairFailedMigrations(bunPath, dbPath, names);
249
+ log(`Cleared ${deleted} failed migration row(s).`);
250
+
251
+ const remaining = detectFailedMigrations(bunPath, dbPath);
252
+ const status: DoctorStatus = remaining.length === 0 ? 'repaired' : 'failed';
253
+ const message =
254
+ status === 'repaired'
255
+ ? 'Cleared the failed migration record. Relaunch Shogo to re-apply migrations cleanly.'
256
+ : `Repair incomplete — ${remaining.length} migration(s) still failed: ${remaining
257
+ .map((m) => m.name)
258
+ .join(', ')}.`;
259
+
260
+ return {
261
+ status,
262
+ detected,
263
+ backupPath,
264
+ cleared: names.slice(0, deleted),
265
+ remaining,
266
+ message,
267
+ };
268
+ }
269
+
270
+ // ---------------------------------------------------------------------------
271
+ // Path / binary resolution (for the standalone CLI)
272
+ // ---------------------------------------------------------------------------
273
+
274
+ /**
275
+ * Resolve the desktop app's per-user data directory, mirroring
276
+ * Electron's `app.getPath('userData')` + the `data/` subdir used by
277
+ * apps/desktop/src/paths.ts. `productName` is "Shogo".
278
+ *
279
+ * macOS: ~/Library/Application Support/Shogo/data
280
+ * Windows: %APPDATA%/Shogo/data (Roaming)
281
+ * Linux: $XDG_CONFIG_HOME/Shogo/data (or ~/.config/Shogo/data)
282
+ */
283
+ export function resolveDesktopDataDir(): string {
284
+ const home = homedir();
285
+ const plat = platform();
286
+ let appData: string;
287
+ if (plat === 'darwin') {
288
+ appData = path.join(home, 'Library', 'Application Support');
289
+ } else if (plat === 'win32') {
290
+ appData = process.env.APPDATA ?? path.join(home, 'AppData', 'Roaming');
291
+ } else {
292
+ appData = process.env.XDG_CONFIG_HOME ?? path.join(home, '.config');
293
+ }
294
+ return path.join(appData, 'Shogo', 'data');
295
+ }
296
+
297
+ /** Default path of the desktop app's local SQLite database. */
298
+ export function resolveDesktopDbPath(): string {
299
+ return path.join(resolveDesktopDataDir(), 'shogo.db');
300
+ }
301
+
302
+ /**
303
+ * Candidate locations of the `bun` binary the installed desktop app
304
+ * ships in its `resources/bun/` directory. Best-effort and
305
+ * platform-specific; missing entries are filtered out by the caller.
306
+ */
307
+ function bundledBunCandidates(): string[] {
308
+ const plat = platform();
309
+ const exe = plat === 'win32' ? 'bun.exe' : 'bun';
310
+ const home = homedir();
311
+ const candidates: string[] = [];
312
+ if (plat === 'darwin') {
313
+ candidates.push(
314
+ path.join('/Applications', 'Shogo.app', 'Contents', 'Resources', 'bun', exe),
315
+ path.join(home, 'Applications', 'Shogo.app', 'Contents', 'Resources', 'bun', exe),
316
+ );
317
+ } else if (plat === 'linux') {
318
+ candidates.push(
319
+ path.join('/opt', 'Shogo', 'resources', 'bun', exe),
320
+ path.join('/usr', 'lib', 'shogo', 'resources', 'bun', exe),
321
+ );
322
+ }
323
+ // Windows installs under a version-stamped Squirrel dir
324
+ // (%LOCALAPPDATA%/shogo/app-<ver>/resources/bun/bun.exe) which we can't
325
+ // resolve without globbing; rely on --bun / PATH there.
326
+ return candidates;
327
+ }
328
+
329
+ /** True if the given binary can be executed and reports a version. */
330
+ function bunIsUsable(bunPath: string): boolean {
331
+ try {
332
+ execFileSync(bunPath, ['--version'], { stdio: 'ignore', timeout: 5000 });
333
+ return true;
334
+ } catch {
335
+ return false;
336
+ }
337
+ }
338
+
339
+ /**
340
+ * Resolve a usable `bun` binary for the repair scripts, in priority order:
341
+ * 1. explicit `override` (the `--bun` flag)
342
+ * 2. the bun currently running this CLI (tarball release)
343
+ * 3. the desktop app's bundled bun
344
+ * 4. `bun` on PATH
345
+ * Returns null if none are usable.
346
+ */
347
+ export function resolveBunBinary(override?: string): string | null {
348
+ if (override) return bunIsUsable(override) ? override : null;
349
+ if (process.versions.bun && process.execPath) return process.execPath;
350
+ for (const candidate of bundledBunCandidates()) {
351
+ if (existsSync(candidate) && bunIsUsable(candidate)) return candidate;
352
+ }
353
+ if (bunIsUsable('bun')) return 'bun';
354
+ return null;
355
+ }
@@ -20,7 +20,7 @@ import { PID_FILE, WORKER_LOG, WORKER_ERR, ensureHome } from './paths.ts';
20
20
 
21
21
  export interface SpawnOpts {
22
22
  entry: string;
23
- runner: 'bun' | 'node';
23
+ runner: 'bun' | 'node' | 'tsx';
24
24
  env: NodeJS.ProcessEnv;
25
25
  cwd: string;
26
26
  detach?: boolean;
@@ -60,7 +60,7 @@ export type Channel = 'stable' | 'beta' | 'nightly';
60
60
  * Layout assumed by `buildAssetUrls()`:
61
61
  * ${baseUrl}/v${version}/${assetName}
62
62
  */
63
- export const DEFAULT_RELEASES_BASE_URL = 'https://github.com/shogo-ai/shogo/releases/download';
63
+ export const DEFAULT_RELEASES_BASE_URL = 'https://github.com/shogo-labs/shogo-ai/releases/download';
64
64
 
65
65
  export interface InstallOptions {
66
66
  /** Specific version to install (e.g. "0.1.0"). Default: latest in channel. */
@@ -48,9 +48,43 @@ const PORT_RANGE_START = 37100;
48
48
  const PORT_RANGE_END = 37900;
49
49
  const API_PORT_OFFSET = 1; // API server port = agentPort + 1.
50
50
 
51
+ /**
52
+ * Offset (from the agent port) of the workspace preview sidecar base.
53
+ *
54
+ * A workspace runtime serves N attached projects, each with its own preview
55
+ * sidecar (`server.tsx`) on `WORKSPACE_API_PORT_BASE + projectIndex`. We anchor
56
+ * that base at `agentPort + 2` (agentPort=+0, its API/skill server=+1) so every
57
+ * runtime gets a DISTINCT sidecar range. Before warm-multiple, only one runtime
58
+ * ran at a time and all of them could share the fixed default base (3101); now
59
+ * that several runtimes stay warm concurrently they were all binding 3101 and
60
+ * crash-looping (force-killing each other's leaked sidecars), which SIGKILLed
61
+ * the agent-runtime and restart-looped it.
62
+ */
63
+ const PREVIEW_API_BASE_OFFSET = 2;
64
+
65
+ /**
66
+ * Contiguous ports reserved per runtime: agent(+0), API/skill server(+1) and
67
+ * the preview sidecars (+2 … +RUNTIME_PORT_BLOCK-1). Reserving the whole block
68
+ * (rather than just the two eagerly-bound ports) guarantees no OTHER runtime's
69
+ * block overlaps this one's sidecar range. 16 supports up to 14 attached
70
+ * projects per workspace; the 800-port range still fits 50 such blocks (>> the
71
+ * default maxRuntimes of 10).
72
+ */
73
+ const RUNTIME_PORT_BLOCK = 16;
74
+
51
75
  /** Default idle eviction window — unused runtimes get killed after this. */
52
76
  const RUNTIME_IDLE_MS = 15 * 60 * 1000;
53
77
 
78
+ /**
79
+ * A runtime touched within this window is treated as "actively in use"
80
+ * (likely mid-stream — the agent-proxy/ai-proxy refresh `lastUsedAt` on
81
+ * every forwarded chunk) and is never picked as an LRU eviction victim
82
+ * by {@link WorkerRuntimeManager.enforceMaxRuntimes}, even when the cap
83
+ * is exceeded. Better to briefly run one over the cap than to SIGKILL a
84
+ * live chat stream out from under the user.
85
+ */
86
+ const STREAM_ACTIVE_WINDOW_MS = 30 * 1000;
87
+
54
88
  /** Restart backoff bounds. */
55
89
  const RESTART_BACKOFF_BASE_MS = 1_000;
56
90
  const RESTART_BACKOFF_MAX_MS = 60_000;
@@ -222,6 +256,16 @@ export interface WorkerRuntimeManagerOptions {
222
256
  * Cloud workers leave this unset so the default still fires.
223
257
  */
224
258
  idleMs?: number;
259
+ /**
260
+ * Hard ceiling on the number of concurrently-running runtimes. Once
261
+ * exceeded, `ensureRunning` LRU-evicts the least-recently-used slot
262
+ * that has not been touched within {@link STREAM_ACTIVE_WINDOW_MS}
263
+ * (i.e. is not mid-stream). Pass `0`, a negative number, or a
264
+ * non-finite value to disable the cap (the historical behaviour —
265
+ * runtimes were then bounded only by idle eviction). Defaults to
266
+ * disabled when unset.
267
+ */
268
+ maxRuntimes?: number;
225
269
  /** Optional logger. Defaults to console. */
226
270
  logger?: Pick<Console, 'log' | 'warn' | 'error'>;
227
271
  /** Working directory for spawned runtimes. Defaults to OS tmpdir/shogo-runtime. */
@@ -629,12 +673,71 @@ export class WorkerRuntimeManager implements RuntimeResolver {
629
673
  slot.startPromise = this.doStart(slot);
630
674
  try {
631
675
  const r = await slot.startPromise;
676
+ // We just brought a (possibly new) runtime up — enforce the hard
677
+ // ceiling now so a busy multi-project session can't accumulate
678
+ // runtimes without bound. Never evicts the one we just started.
679
+ this.enforceMaxRuntimes(projectId);
632
680
  return this.snapshot(r);
633
681
  } finally {
634
682
  slot.startPromise = null;
635
683
  }
636
684
  }
637
685
 
686
+ /**
687
+ * Enforce {@link WorkerRuntimeManagerOptions.maxRuntimes} by LRU-evicting
688
+ * the least-recently-used running slot until the count is at/under the
689
+ * cap. Skips:
690
+ * - the slot we just started (`keepProjectId`),
691
+ * - any slot touched within {@link STREAM_ACTIVE_WINDOW_MS} (treated as
692
+ * mid-stream — we never cut a live chat),
693
+ * - non-running slots (starting/restarting/stopping/failed don't count
694
+ * against the cap and aren't safe to tear down here).
695
+ *
696
+ * If every over-cap slot is actively streaming we stop early and let the
697
+ * count ride briefly over the cap rather than killing a live stream — the
698
+ * next ensureRunning (or idle eviction) reclaims it once it goes quiet.
699
+ *
700
+ * Fire-and-forget: eviction `stop()` is async (process-group kill +
701
+ * grace window) but we don't await it — the caller shouldn't block its
702
+ * own spawn on tearing down someone else's idle runtime.
703
+ */
704
+ private enforceMaxRuntimes(keepProjectId: string): void {
705
+ const cap = this.opts.maxRuntimes;
706
+ if (cap == null || !Number.isFinite(cap) || cap <= 0) return;
707
+
708
+ const now = Date.now();
709
+ const running = Array.from(this.runtimes.values()).filter((r) => r.status === 'running');
710
+ if (running.length <= cap) return;
711
+
712
+ // LRU order: oldest lastUsedAt first.
713
+ const candidates = running
714
+ .filter((r) => r.projectId !== keepProjectId && now - r.lastUsedAt >= STREAM_ACTIVE_WINDOW_MS)
715
+ .sort((a, b) => a.lastUsedAt - b.lastUsedAt);
716
+
717
+ let overBy = running.length - cap;
718
+ for (const victim of candidates) {
719
+ if (overBy <= 0) break;
720
+ const idleMs = now - victim.lastUsedAt;
721
+ this.log.log(
722
+ `[WorkerRuntimeManager] maxRuntimes=${cap} exceeded (${running.length} running) — ` +
723
+ `LRU-evicting ${victim.projectId} (idle ${Math.round(idleMs / 1000)}s)`,
724
+ );
725
+ void this.stop(victim.projectId).catch((err: any) => {
726
+ this.log.warn(
727
+ `[WorkerRuntimeManager] maxRuntimes eviction of ${victim.projectId} failed: ${err?.message ?? err}`,
728
+ );
729
+ });
730
+ overBy--;
731
+ }
732
+
733
+ if (overBy > 0) {
734
+ this.log.log(
735
+ `[WorkerRuntimeManager] maxRuntimes=${cap} still exceeded by ${overBy} after eviction pass — ` +
736
+ `remaining over-cap slots are mid-stream; will retry on next spawn / idle reap`,
737
+ );
738
+ }
739
+ }
740
+
638
741
  /**
639
742
  * Public entry point for tests + the `worker start` command to pre-warm
640
743
  * a project's workspace without spawning anything. Internally idempotent.
@@ -1170,6 +1273,12 @@ export class WorkerRuntimeManager implements RuntimeResolver {
1170
1273
  PORT: String(slot.agentPort),
1171
1274
  API_SERVER_PORT: String(slot.apiServerPort),
1172
1275
  SKILL_SERVER_PORT: String(slot.apiServerPort),
1276
+ // Per-runtime base for workspace preview sidecars (server.tsx). Anchored
1277
+ // at agentPort+2 so each warm runtime owns a distinct sidecar range and
1278
+ // they can't all collide on the fixed default (3101) — the cause of the
1279
+ // preview-manager crash-loop / agent-runtime SIGKILL restart storm when
1280
+ // multiple projects are kept warm at once.
1281
+ WORKSPACE_API_PORT_BASE: String(slot.agentPort + PREVIEW_API_BASE_OFFSET),
1173
1282
  NODE_ENV: 'production',
1174
1283
  SHOGO_CLOUD_URL: cfg.cloudUrl,
1175
1284
  SHOGO_API_URL: cfg.cloudUrl,
@@ -1355,12 +1464,23 @@ export class WorkerRuntimeManager implements RuntimeResolver {
1355
1464
  const maxAttempts = Math.min(range, 50);
1356
1465
  for (let i = 0; i < maxAttempts; i++) {
1357
1466
  const candidate = PORT_RANGE_START + Math.floor(Math.random() * range);
1358
- if (this.usedPorts.has(candidate) || this.usedPorts.has(candidate + API_PORT_OFFSET)) continue;
1467
+ // Reserve a contiguous per-runtime block so the agent port, its API
1468
+ // server AND every preview sidecar (WORKSPACE_API_PORT_BASE + idx) live
1469
+ // in a range that no other warm runtime can overlap.
1470
+ if (candidate + RUNTIME_PORT_BLOCK - 1 > PORT_RANGE_END) continue;
1471
+ let blockFree = true;
1472
+ for (let off = 0; off < RUNTIME_PORT_BLOCK; off++) {
1473
+ if (this.usedPorts.has(candidate + off)) { blockFree = false; break; }
1474
+ }
1475
+ if (!blockFree) continue;
1476
+ // Liveness-probe only the two ports we bind eagerly (agent + its API
1477
+ // server). The sidecar ports are bound lazily by the agent-runtime and
1478
+ // guarded by its own leaked-process force-kill, so probing the whole
1479
+ // block here would just slow allocation down.
1359
1480
  const agentInUse = await this.isPortListening(candidate);
1360
1481
  const apiInUse = await this.isPortListening(candidate + API_PORT_OFFSET);
1361
1482
  if (agentInUse || apiInUse) continue;
1362
- this.usedPorts.add(candidate);
1363
- this.usedPorts.add(candidate + API_PORT_OFFSET);
1483
+ for (let off = 0; off < RUNTIME_PORT_BLOCK; off++) this.usedPorts.add(candidate + off);
1364
1484
  return candidate;
1365
1485
  }
1366
1486
  throw new Error(
@@ -1370,8 +1490,7 @@ export class WorkerRuntimeManager implements RuntimeResolver {
1370
1490
 
1371
1491
  private releasePort(port: number): void {
1372
1492
  if (!port) return;
1373
- this.usedPorts.delete(port);
1374
- this.usedPorts.delete(port + API_PORT_OFFSET);
1493
+ for (let off = 0; off < RUNTIME_PORT_BLOCK; off++) this.usedPorts.delete(port + off);
1375
1494
  }
1376
1495
 
1377
1496
  private async isPortListening(port: number): Promise<boolean> {
package/src/lib/tunnel.ts CHANGED
@@ -102,7 +102,7 @@ type TunnelWebSocketConstructor = new (url: string, init: TunnelWebSocketInit) =
102
102
 
103
103
  type RuntimeWithBunWebSocketHeaders = typeof globalThis & {
104
104
  Bun?: unknown;
105
- process?: { versions?: { bun?: string } };
105
+ process?: { versions?: { bun?: string; node?: string } };
106
106
  };
107
107
 
108
108
  interface HeartbeatResponse {
@@ -116,8 +116,8 @@ export class TunnelWebSocketHeaderSupportError extends Error {
116
116
  code = 'TUNNEL_WS_HEADERS_UNSUPPORTED' as const;
117
117
  constructor() {
118
118
  super(
119
- 'Tunnel WebSocket auth requires Bun WebSocket header support. ' +
120
- 'This runtime does not advertise Bun, so Authorization headers may be dropped.',
119
+ 'Tunnel WebSocket auth requires a runtime with WebSocket header support (Bun or Node >= 21). ' +
120
+ 'Current runtime does not support WebSocket constructor headers.',
121
121
  );
122
122
  this.name = 'TunnelWebSocketHeaderSupportError';
123
123
  }
@@ -231,7 +231,15 @@ export class WorkerTunnel {
231
231
  private supportsWebSocketConstructorHeaders(
232
232
  runtime: RuntimeWithBunWebSocketHeaders = globalThis as RuntimeWithBunWebSocketHeaders,
233
233
  ): boolean {
234
- return typeof runtime.Bun !== 'undefined' || typeof runtime.process?.versions?.bun === 'string';
234
+ if (typeof runtime.Bun !== 'undefined' || typeof runtime.process?.versions?.bun === 'string') return true;
235
+ // Node 21+ ships WebSocket (via undici) with header support in the constructor.
236
+ // Detect by checking for Node >= 21 (the version that made WebSocket a global).
237
+ const nodeVersion = runtime.process?.versions?.node;
238
+ if (nodeVersion) {
239
+ const major = parseInt(nodeVersion.split('.')[0] ?? '0', 10);
240
+ if (major >= 21) return true;
241
+ }
242
+ return false;
235
243
  }
236
244
 
237
245
  private createTunnelWebSocket(