@pleri/olam-cli 0.1.175 → 0.1.182

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.md +19 -0
  2. package/bin/olam.cjs +22 -0
  3. package/dist/commands/auth.d.ts.map +1 -1
  4. package/dist/commands/auth.js +67 -19
  5. package/dist/commands/auth.js.map +1 -1
  6. package/dist/commands/config.d.ts.map +1 -1
  7. package/dist/commands/config.js +93 -0
  8. package/dist/commands/config.js.map +1 -1
  9. package/dist/commands/destroy.d.ts +41 -0
  10. package/dist/commands/destroy.d.ts.map +1 -1
  11. package/dist/commands/destroy.js +81 -33
  12. package/dist/commands/destroy.js.map +1 -1
  13. package/dist/commands/dispatch-resolve.d.ts +54 -0
  14. package/dist/commands/dispatch-resolve.d.ts.map +1 -0
  15. package/dist/commands/dispatch-resolve.js +105 -0
  16. package/dist/commands/dispatch-resolve.js.map +1 -0
  17. package/dist/commands/dispatch.d.ts.map +1 -1
  18. package/dist/commands/dispatch.js +40 -9
  19. package/dist/commands/dispatch.js.map +1 -1
  20. package/dist/commands/flywheel/index.d.ts.map +1 -1
  21. package/dist/commands/flywheel/index.js +4 -0
  22. package/dist/commands/flywheel/index.js.map +1 -1
  23. package/dist/commands/flywheel/install-sessionstart-hook.d.ts +64 -0
  24. package/dist/commands/flywheel/install-sessionstart-hook.d.ts.map +1 -0
  25. package/dist/commands/flywheel/install-sessionstart-hook.js +197 -0
  26. package/dist/commands/flywheel/install-sessionstart-hook.js.map +1 -0
  27. package/dist/commands/flywheel/k5-validate.d.ts +31 -0
  28. package/dist/commands/flywheel/k5-validate.d.ts.map +1 -1
  29. package/dist/commands/flywheel/k5-validate.js +80 -19
  30. package/dist/commands/flywheel/k5-validate.js.map +1 -1
  31. package/dist/commands/flywheel/session-start.d.ts +26 -0
  32. package/dist/commands/flywheel/session-start.d.ts.map +1 -0
  33. package/dist/commands/flywheel/session-start.js +119 -0
  34. package/dist/commands/flywheel/session-start.js.map +1 -0
  35. package/dist/commands/host-cp.d.ts +0 -3
  36. package/dist/commands/host-cp.d.ts.map +1 -1
  37. package/dist/commands/host-cp.js +27 -2
  38. package/dist/commands/host-cp.js.map +1 -1
  39. package/dist/commands/kg-classify.d.ts.map +1 -1
  40. package/dist/commands/kg-classify.js +20 -0
  41. package/dist/commands/kg-classify.js.map +1 -1
  42. package/dist/commands/kg-doctor.d.ts +67 -6
  43. package/dist/commands/kg-doctor.d.ts.map +1 -1
  44. package/dist/commands/kg-doctor.js +126 -46
  45. package/dist/commands/kg-doctor.js.map +1 -1
  46. package/dist/commands/list.d.ts +27 -0
  47. package/dist/commands/list.d.ts.map +1 -1
  48. package/dist/commands/list.js +67 -19
  49. package/dist/commands/list.js.map +1 -1
  50. package/dist/commands/memory/status.d.ts +18 -0
  51. package/dist/commands/memory/status.d.ts.map +1 -1
  52. package/dist/commands/memory/status.js +38 -2
  53. package/dist/commands/memory/status.js.map +1 -1
  54. package/dist/commands/memory-service-container.d.ts +44 -0
  55. package/dist/commands/memory-service-container.d.ts.map +1 -1
  56. package/dist/commands/memory-service-container.js +49 -0
  57. package/dist/commands/memory-service-container.js.map +1 -1
  58. package/dist/commands/ps.d.ts +32 -0
  59. package/dist/commands/ps.d.ts.map +1 -1
  60. package/dist/commands/ps.js +34 -0
  61. package/dist/commands/ps.js.map +1 -1
  62. package/dist/commands/runbooks.d.ts +32 -0
  63. package/dist/commands/runbooks.d.ts.map +1 -1
  64. package/dist/commands/runbooks.js +79 -22
  65. package/dist/commands/runbooks.js.map +1 -1
  66. package/dist/commands/skills-source.d.ts.map +1 -1
  67. package/dist/commands/skills-source.js +77 -2
  68. package/dist/commands/skills-source.js.map +1 -1
  69. package/dist/commands/upgrade-history.d.ts +0 -2
  70. package/dist/commands/upgrade-history.d.ts.map +1 -1
  71. package/dist/commands/upgrade-history.js +0 -6
  72. package/dist/commands/upgrade-history.js.map +1 -1
  73. package/dist/commands/upgrade-lock.d.ts +0 -9
  74. package/dist/commands/upgrade-lock.d.ts.map +1 -1
  75. package/dist/commands/upgrade-lock.js +1 -1
  76. package/dist/commands/upgrade-lock.js.map +1 -1
  77. package/dist/commands/world-snapshot.d.ts +13 -0
  78. package/dist/commands/world-snapshot.d.ts.map +1 -1
  79. package/dist/commands/world-snapshot.js +81 -1
  80. package/dist/commands/world-snapshot.js.map +1 -1
  81. package/dist/commands/yolo.d.ts +95 -0
  82. package/dist/commands/yolo.d.ts.map +1 -0
  83. package/dist/commands/yolo.js +377 -0
  84. package/dist/commands/yolo.js.map +1 -0
  85. package/dist/image-digests.json +8 -8
  86. package/dist/index.js +3990 -2445
  87. package/dist/index.js.map +1 -1
  88. package/dist/lib/anthropic-base-url-file.d.ts +37 -0
  89. package/dist/lib/anthropic-base-url-file.d.ts.map +1 -0
  90. package/dist/lib/anthropic-base-url-file.js +46 -0
  91. package/dist/lib/anthropic-base-url-file.js.map +1 -0
  92. package/dist/lib/auth-remote.d.ts +9 -17
  93. package/dist/lib/auth-remote.d.ts.map +1 -1
  94. package/dist/lib/auth-remote.js +25 -20
  95. package/dist/lib/auth-remote.js.map +1 -1
  96. package/dist/lib/cf-access-token.d.ts +32 -0
  97. package/dist/lib/cf-access-token.d.ts.map +1 -0
  98. package/dist/lib/cf-access-token.js +52 -0
  99. package/dist/lib/cf-access-token.js.map +1 -0
  100. package/dist/lib/config.d.ts +17 -3
  101. package/dist/lib/config.d.ts.map +1 -1
  102. package/dist/lib/config.js +28 -4
  103. package/dist/lib/config.js.map +1 -1
  104. package/dist/lib/kubectl-context.d.ts +49 -0
  105. package/dist/lib/kubectl-context.d.ts.map +1 -1
  106. package/dist/lib/kubectl-context.js +64 -2
  107. package/dist/lib/kubectl-context.js.map +1 -1
  108. package/dist/lib/upgrade-kubernetes.d.ts +7 -0
  109. package/dist/lib/upgrade-kubernetes.d.ts.map +1 -1
  110. package/dist/lib/upgrade-kubernetes.js +35 -8
  111. package/dist/lib/upgrade-kubernetes.js.map +1 -1
  112. package/dist/mcp-server.js +1470 -991
  113. package/hermes-bundle/version.json +1 -1
  114. package/host-cp/k8s/manifests/45-pvc.yaml +6 -2
  115. package/host-cp/k8s/manifests/50-deployment.yaml +1 -1
  116. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  117. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  118. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  119. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  120. package/host-cp/observability/trace-summary.mjs +267 -0
  121. package/host-cp/src/bootstrap-selective.mjs +58 -0
  122. package/host-cp/src/host-stream.mjs +52 -0
  123. package/host-cp/src/plan-chat-service.mjs +51 -0
  124. package/host-cp/src/redirect.mjs +159 -0
  125. package/host-cp/src/resolver.mjs +121 -0
  126. package/host-cp/src/router.mjs +168 -0
  127. package/host-cp/src/serve-only-config.mjs +85 -0
  128. package/host-cp/src/server.mjs +375 -205
  129. package/host-cp/src/world-services.mjs +136 -0
  130. package/package.json +1 -1
@@ -45,11 +45,14 @@ import {
45
45
  createNdjsonSpanSink,
46
46
  attachBetaResponseEvents,
47
47
  } from '../observability/ndjson-span-sink.mjs';
48
- import { betaResponseEmitter } from '@olam/auth-client';
48
+ import { betaResponseEmitter, cfAccessHeaders } from '@olam/auth-client';
49
49
  import { attemptRecovery, findScenarioForKind } from '../recovery/index.mjs';
50
50
  import { detectHaltChunk } from './halt-detect.mjs';
51
+ import { evaluateRedirect, applyRedirect } from './redirect.mjs';
51
52
  import { spawnUpgraderContainer } from './upgrade-spawner.mjs';
53
+ import { isPlanningPath } from './bootstrap-selective.mjs';
52
54
  import { parseProxyPath, perWorldBase, proxyToWorld } from './proxy.mjs';
55
+ import { fetchWorldServices as fetchWorldServicesImpl } from './world-services.mjs';
53
56
  import { resolveHostCpEngine } from './engine-identity.mjs';
54
57
  import { StartupToken } from './auth.mjs';
55
58
  import { SseGate, isSsePath, wireRelease } from './sse-gate.mjs';
@@ -93,6 +96,7 @@ import {
93
96
  import { instrumentHandler, renderMetrics } from './metrics.mjs';
94
97
  import { handleDispatchFromEmail } from './lib/email-dispatch.mjs';
95
98
  import { emitTierSuggestion } from '../dispatch/auto-tier-scheduler.mjs';
99
+ import { isServeOnly, isOrchestrationRoute, ORCHESTRATION_UNAVAILABLE } from './serve-only-config.mjs';
96
100
 
97
101
  // ── Deployment-mode detection ─────────────────────────────────────
98
102
  //
@@ -111,6 +115,17 @@ const HOST_CP_MODE = process.env.OLAM_HOST_CP_MODE
111
115
  ?? (fs.existsSync('/.dockerenv') ? 'container' : 'bare');
112
116
  const WORLD_HOST = HOST_CP_MODE === 'container' ? 'host.docker.internal' : '127.0.0.1';
113
117
 
118
+ // SERVE-ONLY mode (host-cp-gke-serve-only-mode Phase A). When
119
+ // OLAM_HOST_CP_SERVE_ONLY=true, host-cp serves plan-chat-spa + host-native
120
+ // `/api/*` only: NO docker transport connect, NO world discovery, NO
121
+ // PlanOrchestrator docker wiring, NO pr-merge-poller docker/repo deps.
122
+ // World-orchestration routes return a structured 503. Defaults OFF — FULL
123
+ // (local docker/k3d) mode is byte-for-byte unchanged. See
124
+ // ./serve-only-config.mjs for the pure gate decision (unit-tested there;
125
+ // server.mjs can't be imported in a test because it binds a port + connects
126
+ // docker at module load).
127
+ const SERVE_ONLY = isServeOnly(process.env);
128
+
114
129
  // Container-engine identity, surfaced to olam-cli via the X-Olam-Engine
115
130
  // response header on /health. Resolution lives in engine-identity.mjs so
116
131
  // unit tests can import the pure function without triggering server startup.
@@ -230,9 +245,28 @@ async function refreshVersionSnapshot() {
230
245
  }
231
246
  }
232
247
 
233
- // Kick off an initial check immediately, then poll every 60s.
234
- refreshVersionSnapshot();
235
- const versionPollTimer = setInterval(refreshVersionSnapshot, VERSION_POLL_INTERVAL_MS);
248
+ // SERVE-ONLY: the version snapshot polls the operator-repo HEAD + docker
249
+ // image SHAs every 60s — neither exists on a managed cluster (buildVersionSnapshot
250
+ // is fail-soft and would return all-'unknown', but the docker fetches are futile).
251
+ // Seed a static all-'unknown' snapshot so GET /api/version/status returns 200
252
+ // 'unknown' (not 503 pending) and skip the poll. clearInterval(null) is a no-op.
253
+ const UNKNOWN_VERSION_SNAPSHOT = {
254
+ hostCp: { running: process.env.OLAM_BUILD_SHA ?? 'unknown', latest: 'unknown', upgradeAvailable: false },
255
+ authService: { running: 'unknown', latest: 'unknown', upgradeAvailable: false },
256
+ devbox: { running: 'unknown', latest: 'unknown', upgradeAvailable: false },
257
+ operatorHead: 'unknown',
258
+ checkedAt: new Date().toISOString(),
259
+ cliVersion: process.env.OLAM_CLI_VERSION ?? 'unknown',
260
+ };
261
+
262
+ let versionPollTimer = null;
263
+ if (SERVE_ONLY) {
264
+ versionSnapshot = UNKNOWN_VERSION_SNAPSHOT;
265
+ } else {
266
+ // Kick off an initial check immediately, then poll every 60s.
267
+ refreshVersionSnapshot();
268
+ versionPollTimer = setInterval(refreshVersionSnapshot, VERSION_POLL_INTERVAL_MS);
269
+ }
236
270
 
237
271
  // ── World registry — persistent + admin-managed ───────────────────────
238
272
  //
@@ -252,6 +286,39 @@ const REGISTRY_PATH =
252
286
  ? '/data/host-cp-registry.json'
253
287
  : path.join(os.homedir(), '.olam', 'host-cp-registry.json'));
254
288
 
289
+ /**
290
+ * Read the cloud-mode Anthropic proxy URL configured by the operator.
291
+ *
292
+ * Resolution order mirrors packages/adapters/src/shared/anthropic-base-url.ts
293
+ * and packages/auth-client/src/cloud-mode.ts:
294
+ * 1. OLAM_ANTHROPIC_BASE_URL env var
295
+ * 2. ~/.olam/anthropic-base-url file
296
+ * 3. ANTHROPIC_BASE_URL env var
297
+ * 4. '' (empty — skip injection)
298
+ *
299
+ * Called on each plan-creation request (not cached at startup) so operators
300
+ * can update the file without restarting host-cp.
301
+ *
302
+ * @returns {string}
303
+ */
304
+ function readAnthropicBaseUrl() {
305
+ const fromOlamEnv = process.env['OLAM_ANTHROPIC_BASE_URL'];
306
+ if (fromOlamEnv && fromOlamEnv.length > 0) return fromOlamEnv.trim();
307
+
308
+ try {
309
+ const file = path.join(os.homedir(), '.olam', 'anthropic-base-url');
310
+ const content = fs.readFileSync(file, 'utf-8').trim();
311
+ if (content.length > 0) return content;
312
+ } catch {
313
+ // file absent — fall through
314
+ }
315
+
316
+ const fromShellEnv = process.env['ANTHROPIC_BASE_URL'];
317
+ if (fromShellEnv && fromShellEnv.length > 0) return fromShellEnv.trim();
318
+
319
+ return '';
320
+ }
321
+
255
322
  /** @type {Record<string, number>} */
256
323
  let WORLDS = {};
257
324
 
@@ -412,7 +479,11 @@ const prPoller = createPrMergePoller({
412
479
  pollIntervalMs: PR_POLL_INTERVAL_MS,
413
480
  gracePeriodMs: MERGE_GRACE_MS,
414
481
  });
415
- prPoller.start();
482
+ // SERVE-ONLY: pr-merge-poller polls GH for merged PRs then destroys worlds
483
+ // via docker. No docker on a managed cluster — don't start the poll loop.
484
+ // (The poller object is still constructed so the shutdown handler's
485
+ // prPoller.stop() stays a no-op; start() is the docker/repo-touching part.)
486
+ if (!SERVE_ONLY) prPoller.start();
416
487
 
417
488
  // ── Worlds-DB reconcile loop ────────────────────────────────────
418
489
  //
@@ -420,24 +491,31 @@ prPoller.start();
420
491
  // (e.g., host-cp started after `olam create`). This reconciler bridges
421
492
  // that gap: it reads worlds.db and registers any running worlds that
422
493
  // aren't already in WORLDS.
423
- const worldsDbReconciler = startWorldsDbReconciler({
424
- dbPath: WORLDS_DB_PATH,
425
- dockerHost: DOCKER_HOST,
426
- worldHost: WORLD_HOST,
427
- getRegistry: () => WORLDS,
428
- onWorldAdded: (id, port) => {
429
- WORLDS = { ...WORLDS, [id]: port };
430
- persistRegistry();
431
- },
432
- onWorldRemoved: (id) => {
433
- if (id in WORLDS) {
434
- const next = { ...WORLDS };
435
- delete next[id];
436
- WORLDS = next;
437
- persistRegistry();
438
- }
439
- },
440
- });
494
+ //
495
+ // SERVE-ONLY: reconciliation reads worlds.db + probes docker for each
496
+ // world's host port. No worlds.db / docker on a managed cluster — skip it;
497
+ // WORLDS stays empty. `null` sentinel keeps the shutdown handler's
498
+ // `worldsDbReconciler?.stop()` a safe no-op.
499
+ const worldsDbReconciler = SERVE_ONLY
500
+ ? null
501
+ : startWorldsDbReconciler({
502
+ dbPath: WORLDS_DB_PATH,
503
+ dockerHost: DOCKER_HOST,
504
+ worldHost: WORLD_HOST,
505
+ getRegistry: () => WORLDS,
506
+ onWorldAdded: (id, port) => {
507
+ WORLDS = { ...WORLDS, [id]: port };
508
+ persistRegistry();
509
+ },
510
+ onWorldRemoved: (id) => {
511
+ if (id in WORLDS) {
512
+ const next = { ...WORLDS };
513
+ delete next[id];
514
+ WORLDS = next;
515
+ persistRegistry();
516
+ }
517
+ },
518
+ });
441
519
 
442
520
  // ── Plan orchestrator (Phase 1 spike) ─────────────────────────────────────
443
521
  //
@@ -531,7 +609,10 @@ function scheduleServersSnapshot() {
531
609
  }, SERVERS_SNAPSHOT_DEBOUNCE_MS);
532
610
  }
533
611
 
534
- const stopEvents = subscribeDockerEvents({
612
+ // SERVE-ONLY: docker-events subscription opens a long-poll against the
613
+ // docker /events stream — no docker on a managed cluster. Skip the
614
+ // subscription; `stopEvents` is a no-op so the shutdown handler is safe.
615
+ const stopEvents = SERVE_ONLY ? () => {} : subscribeDockerEvents({
535
616
  dockerHost: DOCKER_HOST,
536
617
  onWorldRestart: (worldId) => {
537
618
  cache.invalidate(worldId);
@@ -896,6 +977,18 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
896
977
  });
897
978
  }
898
979
 
980
+ // Phase B3 (plan-chat-spa-supersedes-control-plane): 301 redirect layer.
981
+ // Runs BEFORE static-serve so legacy `/world/:id` catch-all URLs that
982
+ // would otherwise be served as the SPA shell (and then 404 inside the
983
+ // SPA router after Phase B4 deletes the route) get redirected to
984
+ // their canonical successor. Allow-listed; closed set; security
985
+ // gated against SEC-2 (no caller-controlled Location, regex-validated
986
+ // ids, hardcoded prefixes). See packages/host-cp/src/redirect.mjs.
987
+ if (req.method === 'GET' || req.method === 'HEAD') {
988
+ const redirectVerdict = evaluateRedirect(url.pathname);
989
+ if (applyRedirect(res, redirectVerdict)) return;
990
+ }
991
+
899
992
  // Phase F-2-D dogfood fix: serve the SPA dist/ for non-API GET requests
900
993
  // BEFORE auth gate. The SPA itself is the auth gate — it loads, fetches
901
994
  // /api/bootstrap (unauthed), sets the cookie, then makes authed API calls.
@@ -913,6 +1006,18 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
913
1006
  // Anything that doesn't match a static file falls through to the auth
914
1007
  // gate + 404 below (preserves the JSON-error contract for unknown
915
1008
  // /api/* paths).
1009
+ //
1010
+ // Phase A serve-only: world-ORCHESTRATION routes degrade to a structured
1011
+ // 503 BEFORE static-serve. This must run pre-static so (a) a GET
1012
+ // /v1/worlds/<id>/status can't be served the SPA HTML shell, and (b) a
1013
+ // POST /api/worlds/<id>/tunnels / DELETE /api/worlds/<id> mutation can't
1014
+ // execute (no docker on a managed cluster; honest degradation, not a
1015
+ // hollow shell). Method-aware: bare GET /api/worlds (list) is NOT blocked
1016
+ // (returns []). No-op in full mode (SERVE_ONLY false). See CP3 finding.
1017
+ if (SERVE_ONLY && isOrchestrationRoute(url.pathname, req.method)) {
1018
+ return jsonReply(res, 503, ORCHESTRATION_UNAVAILABLE);
1019
+ }
1020
+
916
1021
  if (req.method === 'GET' || req.method === 'HEAD') {
917
1022
  const served = await tryServeStatic(req, res, url.pathname);
918
1023
  if (served) return;
@@ -1676,6 +1781,13 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
1676
1781
  }
1677
1782
  }
1678
1783
 
1784
+ // SERVE-ONLY: every `/api/world/<id>/...` route is world orchestration —
1785
+ // it needs docker (proxy to a per-world CP, ttyd, secret fetch, progress
1786
+ // probe). On a managed cluster there's no docker + WORLDS is empty, so all
1787
+ // (serve-only world-orchestration 503 is handled earlier, pre-static, by
1788
+ // the isOrchestrationRoute guard — it covers /api/world/, /api/worlds/<id>,
1789
+ // and /v1/worlds/ for all methods, so no per-route guard is needed here.)
1790
+
1679
1791
  // GET /api/world/<id>/progress — phase ladder progress for inbox row.
1680
1792
  const progressMatch = /^\/api\/world\/([^/?#]+)\/progress\/?$/.exec(url.pathname);
1681
1793
  if (progressMatch && req.method === 'GET') {
@@ -2360,6 +2472,23 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2360
2472
  // current SPA model; A11 vault-sync can refine the mapping).
2361
2473
  const planId = parsed.session_id ?? 'default';
2362
2474
  const basicAuth = Buffer.from(`operator:${showcasePw}`).toString('base64');
2475
+
2476
+ // Gap 3: enrich the dispatch body with the operator's anthropicBaseUrl
2477
+ // so plan-DO can propagate it to spawned CF Sandbox child worlds.
2478
+ // Only injected when not already set by the SPA (SPA has no auth-worker
2479
+ // config knowledge — host-cp is the sole injection point).
2480
+ const anthropicBaseUrl = readAnthropicBaseUrl();
2481
+ const enriched = anthropicBaseUrl && !parsed.anthropicBaseUrl
2482
+ ? JSON.stringify({ ...parsed, anthropicBaseUrl })
2483
+ : body;
2484
+
2485
+ // Phase H h2: attach CF Access service-token headers when configured
2486
+ // (machine-to-machine auth). Additive alongside Basic auth. CF Access
2487
+ // headers are validated at the EDGE of origins behind a CF Access app
2488
+ // (e.g. auth-worker.kaluga.co). They are inert on same-account service-
2489
+ // binding hops (plan-DO) because those bypass the CF Access edge; a CF
2490
+ // Access app in front of plan-DO would still not receive service-binding
2491
+ // traffic. See docs/runbooks/cf-access-service-token.md.
2363
2492
  const upstream = await fetch(
2364
2493
  `${cloudUrl.replace(/\/+$/, '')}/v1/dispatch?plan_id=${encodeURIComponent(planId)}`,
2365
2494
  {
@@ -2367,8 +2496,9 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2367
2496
  headers: {
2368
2497
  'Authorization': `Basic ${basicAuth}`,
2369
2498
  'content-type': 'application/json',
2499
+ ...cfAccessHeaders(),
2370
2500
  },
2371
- body,
2501
+ body: enriched,
2372
2502
  },
2373
2503
  );
2374
2504
  const upstreamBody = await upstream.text();
@@ -2384,6 +2514,72 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2384
2514
  }
2385
2515
  }
2386
2516
 
2517
+ // /api/plans/create — Gap 3 plan-creation handshake (Phase H h2 v1 dogfood).
2518
+ //
2519
+ // Accepts a plan-creation request from the SPA, enriches it with the
2520
+ // operator's anthropicBaseUrl from ~/.olam/anthropic-base-url, and forwards
2521
+ // it to plan-DO's /v1/plans/create so plan-DO stores the bearer URL for
2522
+ // subsequent dispatches + spawned CF Sandbox child worlds.
2523
+ //
2524
+ // Config:
2525
+ // OLAM_CLOUD_URL — plan-DO deployed URL (e.g. https://plan-agent-do.workers.dev)
2526
+ // OLAM_SHOWCASE_PASSWORD — showcase Basic auth password
2527
+ //
2528
+ // Returns 503 when cloud is not configured — operators using local Docker
2529
+ // mode skip this; the SPA treats 503 as a non-fatal degraded state.
2530
+ if (url.pathname === '/api/plans/create' && req.method === 'POST') {
2531
+ const cloudUrl = process.env.OLAM_CLOUD_URL;
2532
+ const showcasePw = process.env.OLAM_SHOWCASE_PASSWORD;
2533
+ if (!cloudUrl || !showcasePw) {
2534
+ return jsonReply(res, 503, {
2535
+ error: 'cloud_not_configured',
2536
+ message: 'OLAM_CLOUD_URL + OLAM_SHOWCASE_PASSWORD not set; plan-DO bearer propagation skipped.',
2537
+ });
2538
+ }
2539
+ try {
2540
+ const reqChunks = [];
2541
+ for await (const c of req) reqChunks.push(c);
2542
+ let parsed = {};
2543
+ try { parsed = JSON.parse(Buffer.concat(reqChunks).toString('utf8') || '{}'); } catch {
2544
+ // Non-fatal: body is optional — caller may POST with no body to trigger
2545
+ // bearer registration without additional plan metadata.
2546
+ }
2547
+
2548
+ // Enrich with anthropicBaseUrl from the host config.
2549
+ const anthropicBaseUrl = readAnthropicBaseUrl();
2550
+ const planId = parsed.planId ?? parsed.session_id ?? `plan-${Date.now()}`;
2551
+ const requestBody = { ...parsed, planId, ...(anthropicBaseUrl ? { anthropicBaseUrl } : {}) };
2552
+
2553
+ const basicAuth = Buffer.from(`operator:${showcasePw}`).toString('base64');
2554
+ // Phase H h2: attach CF Access service-token headers when configured.
2555
+ // See the /api/cloud-dispatch handler above + the runbook for why these
2556
+ // are additive (kept alongside Basic) and edge-validated only on origins
2557
+ // behind a CF Access app — inert on same-account service-binding hops.
2558
+ const upstream = await fetch(
2559
+ `${cloudUrl.replace(/\/+$/, '')}/v1/plans/create?plan_id=${encodeURIComponent(planId)}`,
2560
+ {
2561
+ method: 'POST',
2562
+ headers: {
2563
+ 'Authorization': `Basic ${basicAuth}`,
2564
+ 'content-type': 'application/json',
2565
+ ...cfAccessHeaders(),
2566
+ },
2567
+ body: JSON.stringify(requestBody),
2568
+ },
2569
+ );
2570
+ const upstreamBody = await upstream.text();
2571
+ res.statusCode = upstream.status;
2572
+ res.setHeader('content-type', upstream.headers.get('content-type') ?? 'application/json');
2573
+ res.setHeader('cache-control', 'no-store');
2574
+ return res.end(upstreamBody);
2575
+ } catch (err) {
2576
+ return jsonReply(res, 502, {
2577
+ error: 'plans_create_proxy_failed',
2578
+ message: err.message,
2579
+ });
2580
+ }
2581
+ }
2582
+
2387
2583
  // GET /api/worlds/:id/processes
2388
2584
  // GET /api/worlds/:id/processes/stream — SSE fanout (5s cadence, per-world)
2389
2585
  // Handler: routes/process-port.mjs → handleListProcesses
@@ -2781,148 +2977,54 @@ function handleAuthEvents(req, res) {
2781
2977
  //
2782
2978
  // Fetch port bindings for a world's container via docker-socket-proxy
2783
2979
  // inspect. Returns [{name, host_port, internal_port, url}] tagged with
2784
- // well-known internal ports.
2785
-
2786
- const WELL_KNOWN_PORTS = {
2787
- 3000: 'atlas-core (Rails)',
2788
- 5175: 'diner-app (Vite)',
2789
- 7681: 'Terminal (ttyd)',
2790
- 7682: 'Terminal Shell (ttyd)',
2791
- 8080: 'Per-world CP',
2792
- };
2793
-
2794
- /**
2795
- * Quick liveness probe against a service URL. Returns true if the
2796
- * service responds with ANY HTTP response (1xx-5xx) — we don't care
2797
- * about status codes because each app has its own conventions (Vite
2798
- * 200s on /, ttyd may 401, Rails may 500 on /, the per-world CP 200s).
2799
- * What matters is that something is listening.
2800
- *
2801
- * Probed from inside the host-cp container so we use HOST_FOR_WORLD
2802
- * (host.docker.internal on macOS/Windows, 172.17.0.1 on Linux) — the
2803
- * SPA's own 127.0.0.1:<port> URL is unreachable from container-side.
2804
- *
2805
- * Tight 800ms timeout. Worst case: 4 services × 800ms in parallel ≤ 1s
2806
- * added to the /api/worlds response — acceptable for a 4s poll cycle.
2807
- */
2808
- async function probeServiceLive(hostPort) {
2809
- const probeUrl = `http://${HOST_FOR_WORLD}:${hostPort}/`;
2810
- try {
2811
- const res = await fetch(probeUrl, {
2812
- method: 'HEAD',
2813
- signal: AbortSignal.timeout(800),
2814
- redirect: 'manual',
2815
- });
2816
- return res.status > 0;
2817
- } catch {
2818
- // ECONNREFUSED, timeout, DNS — anything counts as not-live. Try
2819
- // GET as a fallback because some servers (e.g. ttyd) close on HEAD
2820
- // and we don't want false negatives from picky upstream behavior.
2821
- try {
2822
- const res2 = await fetch(probeUrl, {
2823
- method: 'GET',
2824
- signal: AbortSignal.timeout(800),
2825
- redirect: 'manual',
2826
- });
2827
- return res2.status > 0;
2828
- } catch {
2829
- return false;
2830
- }
2831
- }
2832
- }
2833
-
2834
- /**
2835
- * Get the running container's port bindings from socket-proxy + map
2836
- * each to a clickable URL. Each service is then probed in parallel
2837
- * for actual reachability — the docker port mapping just tells us
2838
- * what's CONFIGURED; the probe confirms what's actually LISTENING.
2839
- *
2840
- * Returns [] on any docker-inspect failure (container missing, socket-
2841
- * proxy down) so the API still returns a valid worlds list.
2842
- *
2843
- * @param {string} worldId
2844
- * @returns {Promise<Array<{name: string, host_port: number, internal_port: number, url: string, live: boolean}>>}
2845
- */
2846
- async function fetchWorldServices(worldId) {
2847
- const containerName = `olam-${worldId}-devbox`;
2848
- let data;
2849
- try {
2850
- if (DOCKER_HOST === 'docker-cli') {
2851
- // Bare-node mode: shell out to `docker inspect` instead of HTTP.
2852
- // Same fix pattern as fetchContainerSecret (PR #108). Without
2853
- // this, the services array is always empty in bare-node and the
2854
- // SPA can't find the ttyd host port → terminal renders blank.
2855
- const { spawnSync } = await import('node:child_process');
2856
- const result = spawnSync(
2857
- 'docker',
2858
- ['inspect', containerName],
2859
- { encoding: 'utf-8', timeout: 2000 },
2860
- );
2861
- if (result.status !== 0) return [];
2862
- const arr = JSON.parse(result.stdout || '[]');
2863
- data = Array.isArray(arr) && arr.length > 0 ? arr[0] : null;
2864
- if (!data) return [];
2865
- } else {
2866
- const apiBase = DOCKER_HOST.replace(/^tcp:\/\//, 'http://');
2867
- const res = await fetch(`${apiBase}/containers/${encodeURIComponent(containerName)}/json`, {
2868
- signal: AbortSignal.timeout(2000),
2869
- });
2870
- if (!res.ok) return [];
2871
- data = await res.json();
2872
- }
2873
- const ports = data?.NetworkSettings?.Ports ?? {};
2874
- const draft = [];
2875
- for (const [internal, bindings] of Object.entries(ports)) {
2876
- if (!Array.isArray(bindings) || bindings.length === 0) continue;
2877
- const internalPort = parseInt(internal.split('/')[0], 10);
2878
- const hostPort = parseInt(bindings[0].HostPort, 10);
2879
- if (!Number.isFinite(internalPort) || !Number.isFinite(hostPort)) continue;
2880
- draft.push({
2881
- name: WELL_KNOWN_PORTS[internalPort] ?? `App (port ${internalPort})`,
2882
- host_port: hostPort,
2883
- internal_port: internalPort,
2884
- url: `http://127.0.0.1:${hostPort}`,
2885
- });
2886
- }
2887
-
2888
- // Probe each service in parallel for actual reachability. Adds a
2889
- // `live: boolean` field. The UI dims chips for non-live services
2890
- // so operators can see what's configured-but-down vs configured-
2891
- // and-up at a glance.
2892
- const liveResults = await Promise.all(
2893
- draft.map((s) => probeServiceLive(s.host_port)),
2894
- );
2895
- const services = draft.map((s, i) => ({ ...s, live: liveResults[i] }));
2896
-
2897
- // Stable order: well-known ports first (CP, then Rails/Vite, then terminal).
2898
- services.sort((a, b) => a.internal_port - b.internal_port);
2899
- return services;
2900
- } catch {
2901
- return [];
2902
- }
2980
+ // well-known internal ports. The probe + enrichment logic lives in
2981
+ // ./world-services.mjs (extracted for isolated unit testing); this thin
2982
+ // wrapper binds the host-specific HOST_FOR_WORLD / DOCKER_HOST module
2983
+ // constants so callers keep the single-arg `fetchWorldServices(worldId)`
2984
+ // signature (used at the createLocalWorldsSource wiring above).
2985
+ function fetchWorldServices(worldId) {
2986
+ return fetchWorldServicesImpl(worldId, {
2987
+ hostForWorld: HOST_FOR_WORLD,
2988
+ dockerHost: DOCKER_HOST,
2989
+ });
2903
2990
  }
2904
2991
 
2905
2992
  // ── Static file serving (Phase F-2-D dogfood fix) ──────────────────
2906
2993
  //
2907
- // SPA dist/ is at /app/dist/ inside the container (see Dockerfile).
2908
- // In bare-node mode, the SPA build lives in packages/control-plane/public
2909
- // (where the workspace's `npm run build` writes it). The legacy
2910
- // packages/host-cp/dist used to be hand-tarballed but can drift out of
2911
- // sync with the index.html→bundle hash mapping; prefer public/ when it
2912
- // exists so a stale dist doesn't 404 on /assets/<hash>.js.
2913
-
2994
+ // SPA dist/ is at /app/dist/ inside the container (see Dockerfile; the
2995
+ // build stages plan-chat-spa's dist/client there as of Phase E5).
2996
+ // In bare-node mode, the SPA build lives in
2997
+ // packages/plan-chat-spa/dist/client (where `vite build` writes it as of
2998
+ // the Phase E5 ATOMIC SERVING CUTOVER plan-chat-spa supersedes
2999
+ // control-plane as host-cp's sole served SPA). The legacy
3000
+ // control-plane/public candidates are retained below as a fallback so a
3001
+ // host-cp running against a not-yet-rebuilt worktree still finds *a* SPA;
3002
+ // they are last-resort, ordered after the plan-chat-spa + host-cp/dist
3003
+ // candidates. The legacy packages/host-cp/dist used to be hand-tarballed
3004
+ // but can drift out of sync with the index.html→bundle hash mapping;
3005
+ // prefer the freshly-built dist/client when it exists.
3006
+
3007
+ // Phase E5 (ATOMIC SERVING CUTOVER) — FAIL-CLOSED candidate list.
3008
+ // Every candidate now resolves to a plan-chat-spa build. The retired
3009
+ // control-plane/public candidates were REMOVED (per /codex:rescue on the
3010
+ // cutover): keeping them meant a missing/stale plan-chat-spa build would
3011
+ // silently fall back to serving the OLD control-plane shell and look
3012
+ // superficially healthy. Now an absent plan-chat-spa dist serves nothing
3013
+ // (ENOENT → SPA-shell 404) — a loud failure, not a silent wrong-SPA serve.
3014
+ // - /app/dist — container (Dockerfile stages plan-chat-spa here)
3015
+ // - packages/plan-chat-spa/dist/client — bare-node local (vite build output)
3016
+ // - packages/host-cp/dist — stage-host-cp-spa.sh output (also plan-chat-spa)
2914
3017
  const DIST_DIR = (() => {
2915
3018
  const candidates = [
2916
3019
  '/app/dist',
2917
- path.resolve(process.cwd(), 'packages/control-plane/public'),
2918
- path.resolve(process.cwd(), '../control-plane/public'),
2919
- path.resolve(process.cwd(), 'dist'),
3020
+ path.resolve(process.cwd(), 'packages/plan-chat-spa/dist/client'),
3021
+ path.resolve(process.cwd(), '../plan-chat-spa/dist/client'),
2920
3022
  path.resolve(process.cwd(), 'packages/host-cp/dist'),
2921
3023
  ];
2922
3024
  for (const c of candidates) {
2923
3025
  if (fs.existsSync(c) && fs.existsSync(path.join(c, 'index.html'))) return c;
2924
3026
  }
2925
- return '/app/dist'; // fallback; readFile will surface ENOENT
3027
+ return '/app/dist'; // fallback; readFile will surface ENOENT (fail-closed)
2926
3028
  })();
2927
3029
 
2928
3030
  const SPA_ROUTES = new Set(['/', '/worlds', '/workspaces', '/inbox', '/repos', '/runbooks', '/plan']);
@@ -3032,7 +3134,7 @@ async function tryServeStatic(req, res, pathname) {
3032
3134
  // Without this the SPA loads but every fetch 401s and the operator
3033
3135
  // sees "Could not load worlds — HTTP 401".
3034
3136
  if (isSpaShell) {
3035
- const html = await renderSpaShell(filePath);
3137
+ const html = await renderSpaShell(filePath, pathname);
3036
3138
  res.writeHead(200, {
3037
3139
  'Content-Type': 'text/html; charset=utf-8',
3038
3140
  'Cache-Control': 'no-cache, no-store, must-revalidate',
@@ -3062,11 +3164,12 @@ async function tryServeStatic(req, res, pathname) {
3062
3164
  return true;
3063
3165
  }
3064
3166
 
3065
- // Memoized injected SPA shell. Read once at first request; serve from
3066
- // memory thereafter. Cache invalidates on dist/ mtime change so a
3067
- // rebuilt bundle is picked up without restart.
3068
- let _spaCache = null;
3069
- let _spaCacheKey = '';
3167
+ // Memoized injected SPA shells. Read once per (mtime, bearer-len,
3168
+ // bootstrap-bit) tuple; serve from memory thereafter. Cache invalidates
3169
+ // on dist/ mtime change so a rebuilt bundle is picked up without
3170
+ // restart. Phase D1 — keyed map (not single slot) so /plan and
3171
+ // /workspaces don't trash each other's cached HTML.
3172
+ const _spaCacheByKey = new Map();
3070
3173
 
3071
3174
  /**
3072
3175
  * Bootstrap script injected into the SPA shell. Two responsibilities:
@@ -3144,6 +3247,19 @@ let _spaCacheKey = '';
3144
3247
  // and the token-comparison check skips reload when the cookie
3145
3248
  // already matches (so non-rotation 401s — e.g. genuine auth
3146
3249
  // failures — don't cause a refresh loop).
3250
+ // Phase E5 (ATOMIC SERVING CUTOVER): BOOTSTRAP_SCRIPT is NO LONGER
3251
+ // injected into the served SPA shell. host-cp now serves plan-chat-spa
3252
+ // exclusively, whose bundle re-homes the cookie-bootstrap +
3253
+ // world-fetch-rewrite + 401-recover shim (packages/plan-chat-spa/src/lib/
3254
+ // worldFetch.ts, installed at the top of src/main.tsx — Phase C). The
3255
+ // const is RETAINED, defined-but-unreferenced-in-render, because
3256
+ // scripts/audit-worker-bootstrap-parity.mjs extracts the `HN` (and `WP`)
3257
+ // arrays out of this literal via extractHN()/extractWP() and machine-gates
3258
+ // them byte-equal against worldFetch.ts's HOST_NATIVE_PREFIXES /
3259
+ // WORLD_PREFIXES. Deleting this const would make extractHN return null →
3260
+ // audit FAIL. Keep it as the canonical HN/WP-array parity source until
3261
+ // that audit is repointed at worldFetch.ts directly (follow-up).
3262
+ // eslint-disable-next-line no-unused-vars -- retained as HN/WP parity-audit source
3147
3263
  const BOOTSTRAP_SCRIPT = `<script>(function(){function ck(){var m=document.cookie.match(/olam_host_cp_token=([^;]+)/);return m?m[1]:'';}function sw(t){document.cookie='olam_host_cp_token='+t+'; path=/; samesite=strict';}try{var x=new XMLHttpRequest();x.open('GET','/api/bootstrap',false);x.send();if(x.status===200){var d=JSON.parse(x.responseText);sw(d.token);}}catch(e){console.error('[host-cp bootstrap]',e);}var reloading=false;function recover(){if(reloading)return;try{var x=new XMLHttpRequest();x.open('GET','/api/bootstrap',false);x.send();if(x.status===200){var d=JSON.parse(x.responseText);if(d.token&&ck()!==d.token){reloading=true;sw(d.token);console.warn('[host-cp auth recover] token rotated; reloading');location.reload();}}}catch(e){console.error('[host-cp auth recover]',e);}}var HN=['/api/bootstrap','/api/worlds','/api/projects','/api/workspaces','/api/workspaces/match','/api/repos','/api/runbooks','/api/auth','/api/host-stream','/api/plan-chat','/api/plan/agent-runtime','/health'];var WP=['/api/','/session/','/hooks/','/dispatch','/lanes','/codex/','/review/'];function sr(p){if(typeof p!=='string')return false;if(p.startsWith('/api/world/'))return false;for(var i=0;i<HN.length;i++){var n=HN[i];if(p===n||p.startsWith(n+'?')||p.startsWith(n+'/'))return false;}for(var j=0;j<WP.length;j++){var w=WP[j];if(p===w||p===w.replace(/\\/$/,'')||p.startsWith(w)||p.startsWith(w.replace(/\\/$/,'')+'?')||p.startsWith(w.replace(/\\/$/,'')+'/'))return true;}return false;}function wid(){var p=location.pathname;var m=p.match(/^\\/(world|inbox|session)\\/([^/?#]+)/);if(m)return m[2];if(/^\\/(?:worlds?|workspaces?|world|sandbox|session|inbox|plan|design|repos|runbooks|assets|api|health|favicon)($|\\/|\\?)/.test(p))return null;var r=p.match(/^\\/([a-z][a-z0-9-]+)(?:\\/|$|\\?)/);return r?r[1]:null;}function rw(p){var w=wid();return w?'/api/world/'+w+p:p;}var of=window.fetch.bind(window);window.fetch=function(input,init){var pr;if(typeof input==='string'&&sr(input))pr=of(rw(input),init);else if(input&&typeof input.url==='string'&&sr(input.url))pr=of(new Request(rw(input.url),input),init);else pr=of(input,init);return pr.then(function(res){if(res&&res.status===401)recover();return res;});};var OE=window.EventSource;if(OE){window.EventSource=function(u,i){var s=u;if(typeof s==='string'&&sr(s))s=rw(s);var es=new OE(s,i);es.addEventListener('error',function(){if(es.readyState===OE.CLOSED)recover();});return es;};window.EventSource.prototype=OE.prototype;window.EventSource.CONNECTING=OE.CONNECTING;window.EventSource.OPEN=OE.OPEN;window.EventSource.CLOSED=OE.CLOSED;}})();</script>`;
3148
3264
 
3149
3265
  /**
@@ -3166,14 +3282,42 @@ function buildPlanChatBearerInjection() {
3166
3282
  }
3167
3283
  }
3168
3284
 
3169
- async function renderSpaShell(filePath) {
3285
+ // Phase E5 (ATOMIC SERVING CUTOVER) — BOOTSTRAP_SCRIPT no longer injected.
3286
+ //
3287
+ // host-cp now serves plan-chat-spa exclusively. plan-chat-spa's own
3288
+ // bundle re-homes BOTH auth paths:
3289
+ // - readBearer() (lib/bearer.ts) reads window.__OLAM_PLAN_CHAT_BEARER__
3290
+ // injected inline below (the `bearerInjection`) OR falls back to the
3291
+ // URL-hash channel.
3292
+ // - the cookie-bootstrap + world-fetch-rewrite + 401-recover shim
3293
+ // (lib/worldFetch.ts, installed at the top of src/main.tsx) handles
3294
+ // the cookie + path-rewrite duties that host-cp's BOOTSTRAP_SCRIPT
3295
+ // used to perform.
3296
+ // So the served shell injects ONLY the bearer; the bootstrap shim is
3297
+ // dropped. isPlanningPath() (bootstrap-selective.mjs) is now a wildcard
3298
+ // (true for every string path) — this function relies on that to never
3299
+ // inject BOOTSTRAP_SCRIPT.
3300
+ //
3301
+ // Reversal: re-narrow isPlanningPath() in bootstrap-selective.mjs (see
3302
+ // the revert-seam note there) and restore the `bootstrapPart` branch
3303
+ // below if a surface ever needs host-cp's bootstrap again.
3304
+ //
3305
+ // Per K1 SCP-3 + phase-d-tasks D1 + phase-e-tasks E2 acceptance.
3306
+
3307
+ async function renderSpaShell(filePath, pathname) {
3170
3308
  const stat = fs.statSync(filePath);
3171
3309
  const bearerInjection = buildPlanChatBearerInjection();
3172
- // Cache key must include the bearer so rotation invalidates correctly.
3310
+ // Phase E5: BOOTSTRAP_SCRIPT is never injected plan-chat-spa's own
3311
+ // worldFetch.ts shim owns the cookie-bootstrap + path-rewrite contract.
3312
+ // We still assert the wildcard invariant so a future re-narrowing of
3313
+ // isPlanningPath() surfaces here loudly rather than silently shipping a
3314
+ // mixed shell.
3315
+ const skipBootstrap = isPlanningPath(pathname);
3316
+ // Cache key includes bearer length (the only per-render-varying input
3317
+ // now that bootstrap injection is constant-empty).
3173
3318
  const cacheKey = stat.mtimeMs + ':' + bearerInjection.length;
3174
- if (_spaCache !== null && _spaCacheKey === cacheKey) {
3175
- return _spaCache;
3176
- }
3319
+ const cached = _spaCacheByKey.get(cacheKey);
3320
+ if (cached !== undefined) return cached;
3177
3321
  let html = fs.readFileSync(filePath, 'utf-8');
3178
3322
  // Vite emits relative asset paths (`./assets/...`) so the SPA bundle
3179
3323
  // is portable across deploy paths. But under host-cp's path-segment
@@ -3181,13 +3325,12 @@ async function renderSpaShell(filePath) {
3181
3325
  // which 404s. Rewrite to absolute `/assets/` so all SPA shell paths
3182
3326
  // (/, /worlds, /workspaces, /world/<id>) reference the same bundle.
3183
3327
  html = html.replace(/(href|src)="\.\/assets\//g, '$1="/assets/');
3184
- // Inject right after <head> so the bootstrap runs before any other
3185
- // script tag on the page. Bearer injection runs after the host-cp
3186
- // bootstrap so window.__OLAM_PLAN_CHAT_BEARER__ is set before the
3187
- // SPA bundle reads it.
3188
- html = html.replace(/<head>/i, `<head>\n ${BOOTSTRAP_SCRIPT}\n ${bearerInjection}`);
3189
- _spaCache = html;
3190
- _spaCacheKey = cacheKey;
3328
+ // Inject the bearer right after <head> so window.__OLAM_PLAN_CHAT_BEARER__
3329
+ // is set before the SPA bundle reads it. No bootstrap shim see the
3330
+ // block comment above (Phase E5 cutover).
3331
+ void skipBootstrap; // wildcard invariant: always true; documents intent
3332
+ html = html.replace(/<head>/i, `<head>\n ${bearerInjection}`);
3333
+ _spaCacheByKey.set(cacheKey, html);
3191
3334
  return html;
3192
3335
  }
3193
3336
 
@@ -3262,28 +3405,41 @@ server.on('upgrade', (req, clientSocket, head) => {
3262
3405
  }
3263
3406
  });
3264
3407
 
3265
- // Probe persisted tunnels on startup; mark unreachable ones stale.
3266
- tunnelManager.probeAllOnStartup().catch((err) => {
3267
- console.error(`tunnel startup probe failed: ${err.message}`);
3268
- });
3408
+ // SERVE-ONLY: everything below this point through reconcileWorldsWithDocker
3409
+ // is world-orchestration observability — tunnel probes, the worlds.db /
3410
+ // docker snapshot loops, the per-world activity tracker, and the boot-time
3411
+ // reconcile. None of it has a docker daemon / worlds.db / world tunnels on a
3412
+ // managed cluster. Skip it all in serve-only; the snapshot timers + tracker
3413
+ // stay unstarted so the shutdown handler's `?.`-guarded stops are safe.
3414
+ if (!SERVE_ONLY) {
3415
+ // Probe persisted tunnels on startup; mark unreachable ones stale.
3416
+ tunnelManager.probeAllOnStartup().catch((err) => {
3417
+ console.error(`tunnel startup probe failed: ${err.message}`);
3418
+ });
3269
3419
 
3270
- // Start the 1-Hz worlds.db hash-diff loop after the server boots so
3271
- // the initial broadcast happens once the route is reachable.
3272
- startWorldsSnapshotLoop();
3273
- // Phase B-bonus: start tunnel + listening snapshot loops. Both
3274
- // hash-debounce so idle windows produce zero broadcasts.
3275
- startTunnelsSnapshotLoop();
3276
- startListeningSnapshotLoop();
3420
+ // Start the 1-Hz worlds.db hash-diff loop after the server boots so
3421
+ // the initial broadcast happens once the route is reachable.
3422
+ startWorldsSnapshotLoop();
3423
+ // Phase B-bonus: start tunnel + listening snapshot loops. Both
3424
+ // hash-debounce so idle windows produce zero broadcasts.
3425
+ startTunnelsSnapshotLoop();
3426
+ startListeningSnapshotLoop();
3427
+ }
3277
3428
 
3278
3429
  // Closes #965: live thought_count + total_cost_usd updates from each
3279
3430
  // active world's Claude session JSONL. Periodic (60s default) so Rico's
3280
3431
  // scheduling loop can read fresh values from the `worlds` table and
3281
3432
  // SPAs can subscribe to the `world.activity.tick` event. Fail-soft per
3282
3433
  // world: missing/malformed JSONL never crashes the loop.
3283
- const worldActivityTracker = startWorldActivityTracker({
3284
- dbPath: WORLDS_DB_PATH,
3285
- broadcaster: hostStream,
3286
- });
3434
+ //
3435
+ // SERVE-ONLY: reads worlds.db (absent on a managed cluster). `null` sentinel
3436
+ // keeps the shutdown handler's `worldActivityTracker?.stop()` a no-op.
3437
+ const worldActivityTracker = SERVE_ONLY
3438
+ ? null
3439
+ : startWorldActivityTracker({
3440
+ dbPath: WORLDS_DB_PATH,
3441
+ broadcaster: hostStream,
3442
+ });
3287
3443
 
3288
3444
  // ── Phase 1a / B1 (PR3): engine-select + await-before-listen ─────
3289
3445
  //
@@ -3302,14 +3458,20 @@ const worldActivityTracker = startWorldActivityTracker({
3302
3458
  // resolve through the same async branch for symmetry — the call-site
3303
3459
  // migration to engine.* methods is a downstream task; today the engine
3304
3460
  // instance is held for /health diagnostic + future use.
3305
- const hostCpEngine = await (async () => {
3306
- if (HOST_CP_ENGINE === 'kubernetes') {
3307
- const { createKubernetesEngine } = await import('./engines/kubernetes.mjs');
3308
- return createKubernetesEngine({ env: process.env });
3309
- }
3310
- const { createDockerEngine } = await import('./engines/docker.mjs');
3311
- return createDockerEngine({ dockerHost: DOCKER_HOST });
3312
- })();
3461
+ // SERVE-ONLY: don't resolve a real container engine — there's no docker
3462
+ // daemon to talk to and the KubernetesEngine factory runs a context-
3463
+ // allowlist guard that has no managed-cluster meaning here. Use a minimal
3464
+ // inert engine descriptor so /health still reports an engine name.
3465
+ const hostCpEngine = SERVE_ONLY
3466
+ ? { engineName: 'serve-only', context: undefined }
3467
+ : await (async () => {
3468
+ if (HOST_CP_ENGINE === 'kubernetes') {
3469
+ const { createKubernetesEngine } = await import('./engines/kubernetes.mjs');
3470
+ return createKubernetesEngine({ env: process.env });
3471
+ }
3472
+ const { createDockerEngine } = await import('./engines/docker.mjs');
3473
+ return createDockerEngine({ dockerHost: DOCKER_HOST });
3474
+ })();
3313
3475
 
3314
3476
  // ── Boot-time worlds.db ↔ docker reconciler (issue #963) ─────────────
3315
3477
  //
@@ -3318,17 +3480,24 @@ const hostCpEngine = await (async () => {
3318
3480
  // world is running/active but the container is gone, mark it 'orphaned'.
3319
3481
  // Fail-soft: docker unreachable or DB unavailable → log + continue boot.
3320
3482
  // Runs BEFORE server.listen() so the first request sees reconciled state.
3321
- try {
3322
- await reconcileWorldsWithDocker({
3323
- dbPath: WORLDS_DB_PATH,
3324
- listContainerNames: () => defaultListContainerNames(DOCKER_API_BASE, console.log),
3325
- });
3326
- } catch (err) {
3327
- console.error(`[boot-reconciler] unexpected error (continuing boot): ${err.message}`);
3483
+ //
3484
+ // SERVE-ONLY: no worlds.db / docker container list on a managed cluster.
3485
+ if (!SERVE_ONLY) {
3486
+ try {
3487
+ await reconcileWorldsWithDocker({
3488
+ dbPath: WORLDS_DB_PATH,
3489
+ listContainerNames: () => defaultListContainerNames(DOCKER_API_BASE, console.log),
3490
+ });
3491
+ } catch (err) {
3492
+ console.error(`[boot-reconciler] unexpected error (continuing boot): ${err.message}`);
3493
+ }
3328
3494
  }
3329
3495
 
3330
3496
  server.listen(PORT, '0.0.0.0', () => {
3331
3497
  console.log(`olam-host-cp B3 listening on :${PORT}`);
3498
+ if (SERVE_ONLY) {
3499
+ console.log(' [serve-only] OLAM_HOST_CP_SERVE_ONLY=true — SPA + host-native /api/* only; world orchestration disabled (/api/world/* → 503 orchestration_unavailable).');
3500
+ }
3332
3501
  console.log(` DOCKER_HOST=${DOCKER_HOST}`);
3333
3502
  console.log(` cache TTL=${TTL_SEC}s`);
3334
3503
  console.log(` worlds known: ${Object.keys(WORLDS).join(', ') || '(none)'}`);
@@ -3363,11 +3532,12 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
3363
3532
  console.log(`received ${sig}, shutting down`);
3364
3533
  stopEvents();
3365
3534
  prPoller.stop();
3366
- worldsDbReconciler.stop();
3535
+ // worldsDbReconciler + worldActivityTracker are null in SERVE-ONLY mode.
3536
+ worldsDbReconciler?.stop();
3367
3537
  stopWorldsSnapshotLoop();
3368
3538
  stopTunnelsSnapshotLoop();
3369
3539
  stopListeningSnapshotLoop();
3370
- worldActivityTracker.stop();
3540
+ worldActivityTracker?.stop();
3371
3541
  if (serversSnapshotTimer) { clearTimeout(serversSnapshotTimer); serversSnapshotTimer = null; }
3372
3542
  hostStream.close();
3373
3543
  if (ndjsonSpanSink) ndjsonSpanSink.close().catch(() => {});