@pleri/olam-cli 0.1.180 → 0.1.182

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/commands/auth.d.ts.map +1 -1
  2. package/dist/commands/auth.js +67 -19
  3. package/dist/commands/auth.js.map +1 -1
  4. package/dist/commands/config.d.ts.map +1 -1
  5. package/dist/commands/config.js +93 -0
  6. package/dist/commands/config.js.map +1 -1
  7. package/dist/commands/destroy.d.ts +41 -0
  8. package/dist/commands/destroy.d.ts.map +1 -1
  9. package/dist/commands/destroy.js +81 -33
  10. package/dist/commands/destroy.js.map +1 -1
  11. package/dist/commands/dispatch-resolve.d.ts +54 -0
  12. package/dist/commands/dispatch-resolve.d.ts.map +1 -0
  13. package/dist/commands/dispatch-resolve.js +105 -0
  14. package/dist/commands/dispatch-resolve.js.map +1 -0
  15. package/dist/commands/dispatch.d.ts.map +1 -1
  16. package/dist/commands/dispatch.js +40 -9
  17. package/dist/commands/dispatch.js.map +1 -1
  18. package/dist/commands/flywheel/k5-validate.d.ts +31 -0
  19. package/dist/commands/flywheel/k5-validate.d.ts.map +1 -1
  20. package/dist/commands/flywheel/k5-validate.js +80 -19
  21. package/dist/commands/flywheel/k5-validate.js.map +1 -1
  22. package/dist/commands/kg-classify.d.ts.map +1 -1
  23. package/dist/commands/kg-classify.js +20 -0
  24. package/dist/commands/kg-classify.js.map +1 -1
  25. package/dist/commands/kg-doctor.d.ts +67 -6
  26. package/dist/commands/kg-doctor.d.ts.map +1 -1
  27. package/dist/commands/kg-doctor.js +126 -46
  28. package/dist/commands/kg-doctor.js.map +1 -1
  29. package/dist/commands/list.d.ts +27 -0
  30. package/dist/commands/list.d.ts.map +1 -1
  31. package/dist/commands/list.js +67 -19
  32. package/dist/commands/list.js.map +1 -1
  33. package/dist/commands/memory/status.d.ts +18 -0
  34. package/dist/commands/memory/status.d.ts.map +1 -1
  35. package/dist/commands/memory/status.js +38 -2
  36. package/dist/commands/memory/status.js.map +1 -1
  37. package/dist/commands/memory-service-container.d.ts +44 -0
  38. package/dist/commands/memory-service-container.d.ts.map +1 -1
  39. package/dist/commands/memory-service-container.js +49 -0
  40. package/dist/commands/memory-service-container.js.map +1 -1
  41. package/dist/commands/ps.d.ts +32 -0
  42. package/dist/commands/ps.d.ts.map +1 -1
  43. package/dist/commands/ps.js +34 -0
  44. package/dist/commands/ps.js.map +1 -1
  45. package/dist/commands/runbooks.d.ts +32 -0
  46. package/dist/commands/runbooks.d.ts.map +1 -1
  47. package/dist/commands/runbooks.js +79 -22
  48. package/dist/commands/runbooks.js.map +1 -1
  49. package/dist/commands/skills-source.d.ts.map +1 -1
  50. package/dist/commands/skills-source.js +77 -2
  51. package/dist/commands/skills-source.js.map +1 -1
  52. package/dist/commands/upgrade-history.d.ts +0 -2
  53. package/dist/commands/upgrade-history.d.ts.map +1 -1
  54. package/dist/commands/upgrade-history.js +0 -6
  55. package/dist/commands/upgrade-history.js.map +1 -1
  56. package/dist/commands/upgrade-lock.d.ts +0 -9
  57. package/dist/commands/upgrade-lock.d.ts.map +1 -1
  58. package/dist/commands/upgrade-lock.js +1 -1
  59. package/dist/commands/upgrade-lock.js.map +1 -1
  60. package/dist/commands/world-snapshot.d.ts +13 -0
  61. package/dist/commands/world-snapshot.d.ts.map +1 -1
  62. package/dist/commands/world-snapshot.js +81 -1
  63. package/dist/commands/world-snapshot.js.map +1 -1
  64. package/dist/commands/yolo.d.ts +0 -4
  65. package/dist/commands/yolo.d.ts.map +1 -1
  66. package/dist/commands/yolo.js +2 -2
  67. package/dist/commands/yolo.js.map +1 -1
  68. package/dist/image-digests.json +8 -8
  69. package/dist/index.js +3403 -2459
  70. package/dist/lib/anthropic-base-url-file.d.ts +37 -0
  71. package/dist/lib/anthropic-base-url-file.d.ts.map +1 -0
  72. package/dist/lib/anthropic-base-url-file.js +46 -0
  73. package/dist/lib/anthropic-base-url-file.js.map +1 -0
  74. package/dist/lib/auth-remote.d.ts +9 -0
  75. package/dist/lib/auth-remote.d.ts.map +1 -1
  76. package/dist/lib/auth-remote.js +19 -4
  77. package/dist/lib/auth-remote.js.map +1 -1
  78. package/dist/lib/cf-access-token.d.ts +32 -0
  79. package/dist/lib/cf-access-token.d.ts.map +1 -0
  80. package/dist/lib/cf-access-token.js +52 -0
  81. package/dist/lib/cf-access-token.js.map +1 -0
  82. package/dist/lib/config.d.ts +17 -3
  83. package/dist/lib/config.d.ts.map +1 -1
  84. package/dist/lib/config.js +28 -4
  85. package/dist/lib/config.js.map +1 -1
  86. package/dist/lib/kubectl-context.d.ts +49 -0
  87. package/dist/lib/kubectl-context.d.ts.map +1 -1
  88. package/dist/lib/kubectl-context.js +64 -2
  89. package/dist/lib/kubectl-context.js.map +1 -1
  90. package/dist/lib/upgrade-kubernetes.d.ts +7 -0
  91. package/dist/lib/upgrade-kubernetes.d.ts.map +1 -1
  92. package/dist/lib/upgrade-kubernetes.js +35 -8
  93. package/dist/lib/upgrade-kubernetes.js.map +1 -1
  94. package/dist/mcp-server.js +1457 -1004
  95. package/hermes-bundle/version.json +1 -1
  96. package/host-cp/k8s/manifests/45-pvc.yaml +6 -2
  97. package/host-cp/k8s/manifests/50-deployment.yaml +1 -1
  98. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  99. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  100. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  101. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  102. package/host-cp/observability/trace-summary.mjs +267 -0
  103. package/host-cp/src/bootstrap-selective.mjs +30 -28
  104. package/host-cp/src/host-stream.mjs +52 -0
  105. package/host-cp/src/redirect.mjs +7 -0
  106. package/host-cp/src/router.mjs +168 -0
  107. package/host-cp/src/serve-only-config.mjs +85 -0
  108. package/host-cp/src/server.mjs +346 -217
  109. package/host-cp/src/world-services.mjs +136 -0
  110. package/package.json +1 -1
@@ -45,13 +45,14 @@ import {
45
45
  createNdjsonSpanSink,
46
46
  attachBetaResponseEvents,
47
47
  } from '../observability/ndjson-span-sink.mjs';
48
- import { betaResponseEmitter } from '@olam/auth-client';
48
+ import { betaResponseEmitter, cfAccessHeaders } from '@olam/auth-client';
49
49
  import { attemptRecovery, findScenarioForKind } from '../recovery/index.mjs';
50
50
  import { detectHaltChunk } from './halt-detect.mjs';
51
51
  import { evaluateRedirect, applyRedirect } from './redirect.mjs';
52
52
  import { spawnUpgraderContainer } from './upgrade-spawner.mjs';
53
53
  import { isPlanningPath } from './bootstrap-selective.mjs';
54
54
  import { parseProxyPath, perWorldBase, proxyToWorld } from './proxy.mjs';
55
+ import { fetchWorldServices as fetchWorldServicesImpl } from './world-services.mjs';
55
56
  import { resolveHostCpEngine } from './engine-identity.mjs';
56
57
  import { StartupToken } from './auth.mjs';
57
58
  import { SseGate, isSsePath, wireRelease } from './sse-gate.mjs';
@@ -95,6 +96,7 @@ import {
95
96
  import { instrumentHandler, renderMetrics } from './metrics.mjs';
96
97
  import { handleDispatchFromEmail } from './lib/email-dispatch.mjs';
97
98
  import { emitTierSuggestion } from '../dispatch/auto-tier-scheduler.mjs';
99
+ import { isServeOnly, isOrchestrationRoute, ORCHESTRATION_UNAVAILABLE } from './serve-only-config.mjs';
98
100
 
99
101
  // ── Deployment-mode detection ─────────────────────────────────────
100
102
  //
@@ -113,6 +115,17 @@ const HOST_CP_MODE = process.env.OLAM_HOST_CP_MODE
113
115
  ?? (fs.existsSync('/.dockerenv') ? 'container' : 'bare');
114
116
  const WORLD_HOST = HOST_CP_MODE === 'container' ? 'host.docker.internal' : '127.0.0.1';
115
117
 
118
+ // SERVE-ONLY mode (host-cp-gke-serve-only-mode Phase A). When
119
+ // OLAM_HOST_CP_SERVE_ONLY=true, host-cp serves plan-chat-spa + host-native
120
+ // `/api/*` only: NO docker transport connect, NO world discovery, NO
121
+ // PlanOrchestrator docker wiring, NO pr-merge-poller docker/repo deps.
122
+ // World-orchestration routes return a structured 503. Defaults OFF — FULL
123
+ // (local docker/k3d) mode is byte-for-byte unchanged. See
124
+ // ./serve-only-config.mjs for the pure gate decision (unit-tested there;
125
+ // server.mjs can't be imported in a test because it binds a port + connects
126
+ // docker at module load).
127
+ const SERVE_ONLY = isServeOnly(process.env);
128
+
116
129
  // Container-engine identity, surfaced to olam-cli via the X-Olam-Engine
117
130
  // response header on /health. Resolution lives in engine-identity.mjs so
118
131
  // unit tests can import the pure function without triggering server startup.
@@ -232,9 +245,28 @@ async function refreshVersionSnapshot() {
232
245
  }
233
246
  }
234
247
 
235
- // Kick off an initial check immediately, then poll every 60s.
236
- refreshVersionSnapshot();
237
- const versionPollTimer = setInterval(refreshVersionSnapshot, VERSION_POLL_INTERVAL_MS);
248
+ // SERVE-ONLY: the version snapshot polls the operator-repo HEAD + docker
249
+ // image SHAs every 60s — neither exists on a managed cluster (buildVersionSnapshot
250
+ // is fail-soft and would return all-'unknown', but the docker fetches are futile).
251
+ // Seed a static all-'unknown' snapshot so GET /api/version/status returns 200
252
+ // 'unknown' (not 503 pending) and skip the poll. clearInterval(null) is a no-op.
253
+ const UNKNOWN_VERSION_SNAPSHOT = {
254
+ hostCp: { running: process.env.OLAM_BUILD_SHA ?? 'unknown', latest: 'unknown', upgradeAvailable: false },
255
+ authService: { running: 'unknown', latest: 'unknown', upgradeAvailable: false },
256
+ devbox: { running: 'unknown', latest: 'unknown', upgradeAvailable: false },
257
+ operatorHead: 'unknown',
258
+ checkedAt: new Date().toISOString(),
259
+ cliVersion: process.env.OLAM_CLI_VERSION ?? 'unknown',
260
+ };
261
+
262
+ let versionPollTimer = null;
263
+ if (SERVE_ONLY) {
264
+ versionSnapshot = UNKNOWN_VERSION_SNAPSHOT;
265
+ } else {
266
+ // Kick off an initial check immediately, then poll every 60s.
267
+ refreshVersionSnapshot();
268
+ versionPollTimer = setInterval(refreshVersionSnapshot, VERSION_POLL_INTERVAL_MS);
269
+ }
238
270
 
239
271
  // ── World registry — persistent + admin-managed ───────────────────────
240
272
  //
@@ -254,6 +286,39 @@ const REGISTRY_PATH =
254
286
  ? '/data/host-cp-registry.json'
255
287
  : path.join(os.homedir(), '.olam', 'host-cp-registry.json'));
256
288
 
289
+ /**
290
+ * Read the cloud-mode Anthropic proxy URL configured by the operator.
291
+ *
292
+ * Resolution order mirrors packages/adapters/src/shared/anthropic-base-url.ts
293
+ * and packages/auth-client/src/cloud-mode.ts:
294
+ * 1. OLAM_ANTHROPIC_BASE_URL env var
295
+ * 2. ~/.olam/anthropic-base-url file
296
+ * 3. ANTHROPIC_BASE_URL env var
297
+ * 4. '' (empty — skip injection)
298
+ *
299
+ * Called on each plan-creation request (not cached at startup) so operators
300
+ * can update the file without restarting host-cp.
301
+ *
302
+ * @returns {string}
303
+ */
304
+ function readAnthropicBaseUrl() {
305
+ const fromOlamEnv = process.env['OLAM_ANTHROPIC_BASE_URL'];
306
+ if (fromOlamEnv && fromOlamEnv.length > 0) return fromOlamEnv.trim();
307
+
308
+ try {
309
+ const file = path.join(os.homedir(), '.olam', 'anthropic-base-url');
310
+ const content = fs.readFileSync(file, 'utf-8').trim();
311
+ if (content.length > 0) return content;
312
+ } catch {
313
+ // file absent — fall through
314
+ }
315
+
316
+ const fromShellEnv = process.env['ANTHROPIC_BASE_URL'];
317
+ if (fromShellEnv && fromShellEnv.length > 0) return fromShellEnv.trim();
318
+
319
+ return '';
320
+ }
321
+
257
322
  /** @type {Record<string, number>} */
258
323
  let WORLDS = {};
259
324
 
@@ -414,7 +479,11 @@ const prPoller = createPrMergePoller({
414
479
  pollIntervalMs: PR_POLL_INTERVAL_MS,
415
480
  gracePeriodMs: MERGE_GRACE_MS,
416
481
  });
417
- prPoller.start();
482
+ // SERVE-ONLY: pr-merge-poller polls GH for merged PRs then destroys worlds
483
+ // via docker. No docker on a managed cluster — don't start the poll loop.
484
+ // (The poller object is still constructed so the shutdown handler's
485
+ // prPoller.stop() stays a no-op; start() is the docker/repo-touching part.)
486
+ if (!SERVE_ONLY) prPoller.start();
418
487
 
419
488
  // ── Worlds-DB reconcile loop ────────────────────────────────────
420
489
  //
@@ -422,24 +491,31 @@ prPoller.start();
422
491
  // (e.g., host-cp started after `olam create`). This reconciler bridges
423
492
  // that gap: it reads worlds.db and registers any running worlds that
424
493
  // aren't already in WORLDS.
425
- const worldsDbReconciler = startWorldsDbReconciler({
426
- dbPath: WORLDS_DB_PATH,
427
- dockerHost: DOCKER_HOST,
428
- worldHost: WORLD_HOST,
429
- getRegistry: () => WORLDS,
430
- onWorldAdded: (id, port) => {
431
- WORLDS = { ...WORLDS, [id]: port };
432
- persistRegistry();
433
- },
434
- onWorldRemoved: (id) => {
435
- if (id in WORLDS) {
436
- const next = { ...WORLDS };
437
- delete next[id];
438
- WORLDS = next;
439
- persistRegistry();
440
- }
441
- },
442
- });
494
+ //
495
+ // SERVE-ONLY: reconciliation reads worlds.db + probes docker for each
496
+ // world's host port. No worlds.db / docker on a managed cluster — skip it;
497
+ // WORLDS stays empty. `null` sentinel keeps the shutdown handler's
498
+ // `worldsDbReconciler?.stop()` a safe no-op.
499
+ const worldsDbReconciler = SERVE_ONLY
500
+ ? null
501
+ : startWorldsDbReconciler({
502
+ dbPath: WORLDS_DB_PATH,
503
+ dockerHost: DOCKER_HOST,
504
+ worldHost: WORLD_HOST,
505
+ getRegistry: () => WORLDS,
506
+ onWorldAdded: (id, port) => {
507
+ WORLDS = { ...WORLDS, [id]: port };
508
+ persistRegistry();
509
+ },
510
+ onWorldRemoved: (id) => {
511
+ if (id in WORLDS) {
512
+ const next = { ...WORLDS };
513
+ delete next[id];
514
+ WORLDS = next;
515
+ persistRegistry();
516
+ }
517
+ },
518
+ });
443
519
 
444
520
  // ── Plan orchestrator (Phase 1 spike) ─────────────────────────────────────
445
521
  //
@@ -533,7 +609,10 @@ function scheduleServersSnapshot() {
533
609
  }, SERVERS_SNAPSHOT_DEBOUNCE_MS);
534
610
  }
535
611
 
536
- const stopEvents = subscribeDockerEvents({
612
+ // SERVE-ONLY: docker-events subscription opens a long-poll against the
613
+ // docker /events stream — no docker on a managed cluster. Skip the
614
+ // subscription; `stopEvents` is a no-op so the shutdown handler is safe.
615
+ const stopEvents = SERVE_ONLY ? () => {} : subscribeDockerEvents({
537
616
  dockerHost: DOCKER_HOST,
538
617
  onWorldRestart: (worldId) => {
539
618
  cache.invalidate(worldId);
@@ -927,6 +1006,18 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
927
1006
  // Anything that doesn't match a static file falls through to the auth
928
1007
  // gate + 404 below (preserves the JSON-error contract for unknown
929
1008
  // /api/* paths).
1009
+ //
1010
+ // Phase A serve-only: world-ORCHESTRATION routes degrade to a structured
1011
+ // 503 BEFORE static-serve. This must run pre-static so (a) a GET
1012
+ // /v1/worlds/<id>/status can't be served the SPA HTML shell, and (b) a
1013
+ // POST /api/worlds/<id>/tunnels / DELETE /api/worlds/<id> mutation can't
1014
+ // execute (no docker on a managed cluster; honest degradation, not a
1015
+ // hollow shell). Method-aware: bare GET /api/worlds (list) is NOT blocked
1016
+ // (returns []). No-op in full mode (SERVE_ONLY false). See CP3 finding.
1017
+ if (SERVE_ONLY && isOrchestrationRoute(url.pathname, req.method)) {
1018
+ return jsonReply(res, 503, ORCHESTRATION_UNAVAILABLE);
1019
+ }
1020
+
930
1021
  if (req.method === 'GET' || req.method === 'HEAD') {
931
1022
  const served = await tryServeStatic(req, res, url.pathname);
932
1023
  if (served) return;
@@ -1690,6 +1781,13 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
1690
1781
  }
1691
1782
  }
1692
1783
 
1784
+ // SERVE-ONLY: every `/api/world/<id>/...` route is world orchestration —
1785
+ // it needs docker (proxy to a per-world CP, ttyd, secret fetch, progress
1786
+ // probe). On a managed cluster there's no docker + WORLDS is empty, so all
1787
+ // (serve-only world-orchestration 503 is handled earlier, pre-static, by
1788
+ // the isOrchestrationRoute guard — it covers /api/world/, /api/worlds/<id>,
1789
+ // and /v1/worlds/ for all methods, so no per-route guard is needed here.)
1790
+
1693
1791
  // GET /api/world/<id>/progress — phase ladder progress for inbox row.
1694
1792
  const progressMatch = /^\/api\/world\/([^/?#]+)\/progress\/?$/.exec(url.pathname);
1695
1793
  if (progressMatch && req.method === 'GET') {
@@ -2374,6 +2472,23 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2374
2472
  // current SPA model; A11 vault-sync can refine the mapping).
2375
2473
  const planId = parsed.session_id ?? 'default';
2376
2474
  const basicAuth = Buffer.from(`operator:${showcasePw}`).toString('base64');
2475
+
2476
+ // Gap 3: enrich the dispatch body with the operator's anthropicBaseUrl
2477
+ // so plan-DO can propagate it to spawned CF Sandbox child worlds.
2478
+ // Only injected when not already set by the SPA (SPA has no auth-worker
2479
+ // config knowledge — host-cp is the sole injection point).
2480
+ const anthropicBaseUrl = readAnthropicBaseUrl();
2481
+ const enriched = anthropicBaseUrl && !parsed.anthropicBaseUrl
2482
+ ? JSON.stringify({ ...parsed, anthropicBaseUrl })
2483
+ : body;
2484
+
2485
+ // Phase H h2: attach CF Access service-token headers when configured
2486
+ // (machine-to-machine auth). Additive alongside Basic auth. CF Access
2487
+ // headers are validated at the EDGE of origins behind a CF Access app
2488
+ // (e.g. auth-worker.kaluga.co). They are inert on same-account service-
2489
+ // binding hops (plan-DO) because those bypass the CF Access edge; a CF
2490
+ // Access app in front of plan-DO would still not receive service-binding
2491
+ // traffic. See docs/runbooks/cf-access-service-token.md.
2377
2492
  const upstream = await fetch(
2378
2493
  `${cloudUrl.replace(/\/+$/, '')}/v1/dispatch?plan_id=${encodeURIComponent(planId)}`,
2379
2494
  {
@@ -2381,8 +2496,9 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2381
2496
  headers: {
2382
2497
  'Authorization': `Basic ${basicAuth}`,
2383
2498
  'content-type': 'application/json',
2499
+ ...cfAccessHeaders(),
2384
2500
  },
2385
- body,
2501
+ body: enriched,
2386
2502
  },
2387
2503
  );
2388
2504
  const upstreamBody = await upstream.text();
@@ -2398,6 +2514,72 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2398
2514
  }
2399
2515
  }
2400
2516
 
2517
+ // /api/plans/create — Gap 3 plan-creation handshake (Phase H h2 v1 dogfood).
2518
+ //
2519
+ // Accepts a plan-creation request from the SPA, enriches it with the
2520
+ // operator's anthropicBaseUrl from ~/.olam/anthropic-base-url, and forwards
2521
+ // it to plan-DO's /v1/plans/create so plan-DO stores the bearer URL for
2522
+ // subsequent dispatches + spawned CF Sandbox child worlds.
2523
+ //
2524
+ // Config:
2525
+ // OLAM_CLOUD_URL — plan-DO deployed URL (e.g. https://plan-agent-do.workers.dev)
2526
+ // OLAM_SHOWCASE_PASSWORD — showcase Basic auth password
2527
+ //
2528
+ // Returns 503 when cloud is not configured — operators using local Docker
2529
+ // mode skip this; the SPA treats 503 as a non-fatal degraded state.
2530
+ if (url.pathname === '/api/plans/create' && req.method === 'POST') {
2531
+ const cloudUrl = process.env.OLAM_CLOUD_URL;
2532
+ const showcasePw = process.env.OLAM_SHOWCASE_PASSWORD;
2533
+ if (!cloudUrl || !showcasePw) {
2534
+ return jsonReply(res, 503, {
2535
+ error: 'cloud_not_configured',
2536
+ message: 'OLAM_CLOUD_URL + OLAM_SHOWCASE_PASSWORD not set; plan-DO bearer propagation skipped.',
2537
+ });
2538
+ }
2539
+ try {
2540
+ const reqChunks = [];
2541
+ for await (const c of req) reqChunks.push(c);
2542
+ let parsed = {};
2543
+ try { parsed = JSON.parse(Buffer.concat(reqChunks).toString('utf8') || '{}'); } catch {
2544
+ // Non-fatal: body is optional — caller may POST with no body to trigger
2545
+ // bearer registration without additional plan metadata.
2546
+ }
2547
+
2548
+ // Enrich with anthropicBaseUrl from the host config.
2549
+ const anthropicBaseUrl = readAnthropicBaseUrl();
2550
+ const planId = parsed.planId ?? parsed.session_id ?? `plan-${Date.now()}`;
2551
+ const requestBody = { ...parsed, planId, ...(anthropicBaseUrl ? { anthropicBaseUrl } : {}) };
2552
+
2553
+ const basicAuth = Buffer.from(`operator:${showcasePw}`).toString('base64');
2554
+ // Phase H h2: attach CF Access service-token headers when configured.
2555
+ // See the /api/cloud-dispatch handler above + the runbook for why these
2556
+ // are additive (kept alongside Basic) and edge-validated only on origins
2557
+ // behind a CF Access app — inert on same-account service-binding hops.
2558
+ const upstream = await fetch(
2559
+ `${cloudUrl.replace(/\/+$/, '')}/v1/plans/create?plan_id=${encodeURIComponent(planId)}`,
2560
+ {
2561
+ method: 'POST',
2562
+ headers: {
2563
+ 'Authorization': `Basic ${basicAuth}`,
2564
+ 'content-type': 'application/json',
2565
+ ...cfAccessHeaders(),
2566
+ },
2567
+ body: JSON.stringify(requestBody),
2568
+ },
2569
+ );
2570
+ const upstreamBody = await upstream.text();
2571
+ res.statusCode = upstream.status;
2572
+ res.setHeader('content-type', upstream.headers.get('content-type') ?? 'application/json');
2573
+ res.setHeader('cache-control', 'no-store');
2574
+ return res.end(upstreamBody);
2575
+ } catch (err) {
2576
+ return jsonReply(res, 502, {
2577
+ error: 'plans_create_proxy_failed',
2578
+ message: err.message,
2579
+ });
2580
+ }
2581
+ }
2582
+
2401
2583
  // GET /api/worlds/:id/processes
2402
2584
  // GET /api/worlds/:id/processes/stream — SSE fanout (5s cadence, per-world)
2403
2585
  // Handler: routes/process-port.mjs → handleListProcesses
@@ -2795,148 +2977,54 @@ function handleAuthEvents(req, res) {
2795
2977
  //
2796
2978
  // Fetch port bindings for a world's container via docker-socket-proxy
2797
2979
  // inspect. Returns [{name, host_port, internal_port, url}] tagged with
2798
- // well-known internal ports.
2799
-
2800
- const WELL_KNOWN_PORTS = {
2801
- 3000: 'atlas-core (Rails)',
2802
- 5175: 'diner-app (Vite)',
2803
- 7681: 'Terminal (ttyd)',
2804
- 7682: 'Terminal Shell (ttyd)',
2805
- 8080: 'Per-world CP',
2806
- };
2807
-
2808
- /**
2809
- * Quick liveness probe against a service URL. Returns true if the
2810
- * service responds with ANY HTTP response (1xx-5xx) — we don't care
2811
- * about status codes because each app has its own conventions (Vite
2812
- * 200s on /, ttyd may 401, Rails may 500 on /, the per-world CP 200s).
2813
- * What matters is that something is listening.
2814
- *
2815
- * Probed from inside the host-cp container so we use HOST_FOR_WORLD
2816
- * (host.docker.internal on macOS/Windows, 172.17.0.1 on Linux) — the
2817
- * SPA's own 127.0.0.1:<port> URL is unreachable from container-side.
2818
- *
2819
- * Tight 800ms timeout. Worst case: 4 services × 800ms in parallel ≤ 1s
2820
- * added to the /api/worlds response — acceptable for a 4s poll cycle.
2821
- */
2822
- async function probeServiceLive(hostPort) {
2823
- const probeUrl = `http://${HOST_FOR_WORLD}:${hostPort}/`;
2824
- try {
2825
- const res = await fetch(probeUrl, {
2826
- method: 'HEAD',
2827
- signal: AbortSignal.timeout(800),
2828
- redirect: 'manual',
2829
- });
2830
- return res.status > 0;
2831
- } catch {
2832
- // ECONNREFUSED, timeout, DNS — anything counts as not-live. Try
2833
- // GET as a fallback because some servers (e.g. ttyd) close on HEAD
2834
- // and we don't want false negatives from picky upstream behavior.
2835
- try {
2836
- const res2 = await fetch(probeUrl, {
2837
- method: 'GET',
2838
- signal: AbortSignal.timeout(800),
2839
- redirect: 'manual',
2840
- });
2841
- return res2.status > 0;
2842
- } catch {
2843
- return false;
2844
- }
2845
- }
2846
- }
2847
-
2848
- /**
2849
- * Get the running container's port bindings from socket-proxy + map
2850
- * each to a clickable URL. Each service is then probed in parallel
2851
- * for actual reachability — the docker port mapping just tells us
2852
- * what's CONFIGURED; the probe confirms what's actually LISTENING.
2853
- *
2854
- * Returns [] on any docker-inspect failure (container missing, socket-
2855
- * proxy down) so the API still returns a valid worlds list.
2856
- *
2857
- * @param {string} worldId
2858
- * @returns {Promise<Array<{name: string, host_port: number, internal_port: number, url: string, live: boolean}>>}
2859
- */
2860
- async function fetchWorldServices(worldId) {
2861
- const containerName = `olam-${worldId}-devbox`;
2862
- let data;
2863
- try {
2864
- if (DOCKER_HOST === 'docker-cli') {
2865
- // Bare-node mode: shell out to `docker inspect` instead of HTTP.
2866
- // Same fix pattern as fetchContainerSecret (PR #108). Without
2867
- // this, the services array is always empty in bare-node and the
2868
- // SPA can't find the ttyd host port → terminal renders blank.
2869
- const { spawnSync } = await import('node:child_process');
2870
- const result = spawnSync(
2871
- 'docker',
2872
- ['inspect', containerName],
2873
- { encoding: 'utf-8', timeout: 2000 },
2874
- );
2875
- if (result.status !== 0) return [];
2876
- const arr = JSON.parse(result.stdout || '[]');
2877
- data = Array.isArray(arr) && arr.length > 0 ? arr[0] : null;
2878
- if (!data) return [];
2879
- } else {
2880
- const apiBase = DOCKER_HOST.replace(/^tcp:\/\//, 'http://');
2881
- const res = await fetch(`${apiBase}/containers/${encodeURIComponent(containerName)}/json`, {
2882
- signal: AbortSignal.timeout(2000),
2883
- });
2884
- if (!res.ok) return [];
2885
- data = await res.json();
2886
- }
2887
- const ports = data?.NetworkSettings?.Ports ?? {};
2888
- const draft = [];
2889
- for (const [internal, bindings] of Object.entries(ports)) {
2890
- if (!Array.isArray(bindings) || bindings.length === 0) continue;
2891
- const internalPort = parseInt(internal.split('/')[0], 10);
2892
- const hostPort = parseInt(bindings[0].HostPort, 10);
2893
- if (!Number.isFinite(internalPort) || !Number.isFinite(hostPort)) continue;
2894
- draft.push({
2895
- name: WELL_KNOWN_PORTS[internalPort] ?? `App (port ${internalPort})`,
2896
- host_port: hostPort,
2897
- internal_port: internalPort,
2898
- url: `http://127.0.0.1:${hostPort}`,
2899
- });
2900
- }
2901
-
2902
- // Probe each service in parallel for actual reachability. Adds a
2903
- // `live: boolean` field. The UI dims chips for non-live services
2904
- // so operators can see what's configured-but-down vs configured-
2905
- // and-up at a glance.
2906
- const liveResults = await Promise.all(
2907
- draft.map((s) => probeServiceLive(s.host_port)),
2908
- );
2909
- const services = draft.map((s, i) => ({ ...s, live: liveResults[i] }));
2910
-
2911
- // Stable order: well-known ports first (CP, then Rails/Vite, then terminal).
2912
- services.sort((a, b) => a.internal_port - b.internal_port);
2913
- return services;
2914
- } catch {
2915
- return [];
2916
- }
2980
+ // well-known internal ports. The probe + enrichment logic lives in
2981
+ // ./world-services.mjs (extracted for isolated unit testing); this thin
2982
+ // wrapper binds the host-specific HOST_FOR_WORLD / DOCKER_HOST module
2983
+ // constants so callers keep the single-arg `fetchWorldServices(worldId)`
2984
+ // signature (used at the createLocalWorldsSource wiring above).
2985
+ function fetchWorldServices(worldId) {
2986
+ return fetchWorldServicesImpl(worldId, {
2987
+ hostForWorld: HOST_FOR_WORLD,
2988
+ dockerHost: DOCKER_HOST,
2989
+ });
2917
2990
  }
2918
2991
 
2919
2992
  // ── Static file serving (Phase F-2-D dogfood fix) ──────────────────
2920
2993
  //
2921
- // SPA dist/ is at /app/dist/ inside the container (see Dockerfile).
2922
- // In bare-node mode, the SPA build lives in packages/control-plane/public
2923
- // (where the workspace's `npm run build` writes it). The legacy
2924
- // packages/host-cp/dist used to be hand-tarballed but can drift out of
2925
- // sync with the index.html→bundle hash mapping; prefer public/ when it
2926
- // exists so a stale dist doesn't 404 on /assets/<hash>.js.
2927
-
2994
+ // SPA dist/ is at /app/dist/ inside the container (see Dockerfile; the
2995
+ // build stages plan-chat-spa's dist/client there as of Phase E5).
2996
+ // In bare-node mode, the SPA build lives in
2997
+ // packages/plan-chat-spa/dist/client (where `vite build` writes it as of
2998
+ // the Phase E5 ATOMIC SERVING CUTOVER plan-chat-spa supersedes
2999
+ // control-plane as host-cp's sole served SPA). The legacy
3000
+ // control-plane/public candidates are retained below as a fallback so a
3001
+ // host-cp running against a not-yet-rebuilt worktree still finds *a* SPA;
3002
+ // they are last-resort, ordered after the plan-chat-spa + host-cp/dist
3003
+ // candidates. The legacy packages/host-cp/dist used to be hand-tarballed
3004
+ // but can drift out of sync with the index.html→bundle hash mapping;
3005
+ // prefer the freshly-built dist/client when it exists.
3006
+
3007
+ // Phase E5 (ATOMIC SERVING CUTOVER) — FAIL-CLOSED candidate list.
3008
+ // Every candidate now resolves to a plan-chat-spa build. The retired
3009
+ // control-plane/public candidates were REMOVED (per /codex:rescue on the
3010
+ // cutover): keeping them meant a missing/stale plan-chat-spa build would
3011
+ // silently fall back to serving the OLD control-plane shell and look
3012
+ // superficially healthy. Now an absent plan-chat-spa dist serves nothing
3013
+ // (ENOENT → SPA-shell 404) — a loud failure, not a silent wrong-SPA serve.
3014
+ // - /app/dist — container (Dockerfile stages plan-chat-spa here)
3015
+ // - packages/plan-chat-spa/dist/client — bare-node local (vite build output)
3016
+ // - packages/host-cp/dist — stage-host-cp-spa.sh output (also plan-chat-spa)
2928
3017
  const DIST_DIR = (() => {
2929
3018
  const candidates = [
2930
3019
  '/app/dist',
2931
- path.resolve(process.cwd(), 'packages/control-plane/public'),
2932
- path.resolve(process.cwd(), '../control-plane/public'),
2933
- path.resolve(process.cwd(), 'dist'),
3020
+ path.resolve(process.cwd(), 'packages/plan-chat-spa/dist/client'),
3021
+ path.resolve(process.cwd(), '../plan-chat-spa/dist/client'),
2934
3022
  path.resolve(process.cwd(), 'packages/host-cp/dist'),
2935
3023
  ];
2936
3024
  for (const c of candidates) {
2937
3025
  if (fs.existsSync(c) && fs.existsSync(path.join(c, 'index.html'))) return c;
2938
3026
  }
2939
- return '/app/dist'; // fallback; readFile will surface ENOENT
3027
+ return '/app/dist'; // fallback; readFile will surface ENOENT (fail-closed)
2940
3028
  })();
2941
3029
 
2942
3030
  const SPA_ROUTES = new Set(['/', '/worlds', '/workspaces', '/inbox', '/repos', '/runbooks', '/plan']);
@@ -3159,6 +3247,19 @@ const _spaCacheByKey = new Map();
3159
3247
  // and the token-comparison check skips reload when the cookie
3160
3248
  // already matches (so non-rotation 401s — e.g. genuine auth
3161
3249
  // failures — don't cause a refresh loop).
3250
+ // Phase E5 (ATOMIC SERVING CUTOVER): BOOTSTRAP_SCRIPT is NO LONGER
3251
+ // injected into the served SPA shell. host-cp now serves plan-chat-spa
3252
+ // exclusively, whose bundle re-homes the cookie-bootstrap +
3253
+ // world-fetch-rewrite + 401-recover shim (packages/plan-chat-spa/src/lib/
3254
+ // worldFetch.ts, installed at the top of src/main.tsx — Phase C). The
3255
+ // const is RETAINED, defined-but-unreferenced-in-render, because
3256
+ // scripts/audit-worker-bootstrap-parity.mjs extracts the `HN` (and `WP`)
3257
+ // arrays out of this literal via extractHN()/extractWP() and machine-gates
3258
+ // them byte-equal against worldFetch.ts's HOST_NATIVE_PREFIXES /
3259
+ // WORLD_PREFIXES. Deleting this const would make extractHN return null →
3260
+ // audit FAIL. Keep it as the canonical HN/WP-array parity source until
3261
+ // that audit is repointed at worldFetch.ts directly (follow-up).
3262
+ // eslint-disable-next-line no-unused-vars -- retained as HN/WP parity-audit source
3162
3263
  const BOOTSTRAP_SCRIPT = `<script>(function(){function ck(){var m=document.cookie.match(/olam_host_cp_token=([^;]+)/);return m?m[1]:'';}function sw(t){document.cookie='olam_host_cp_token='+t+'; path=/; samesite=strict';}try{var x=new XMLHttpRequest();x.open('GET','/api/bootstrap',false);x.send();if(x.status===200){var d=JSON.parse(x.responseText);sw(d.token);}}catch(e){console.error('[host-cp bootstrap]',e);}var reloading=false;function recover(){if(reloading)return;try{var x=new XMLHttpRequest();x.open('GET','/api/bootstrap',false);x.send();if(x.status===200){var d=JSON.parse(x.responseText);if(d.token&&ck()!==d.token){reloading=true;sw(d.token);console.warn('[host-cp auth recover] token rotated; reloading');location.reload();}}}catch(e){console.error('[host-cp auth recover]',e);}}var HN=['/api/bootstrap','/api/worlds','/api/projects','/api/workspaces','/api/workspaces/match','/api/repos','/api/runbooks','/api/auth','/api/host-stream','/api/plan-chat','/api/plan/agent-runtime','/health'];var WP=['/api/','/session/','/hooks/','/dispatch','/lanes','/codex/','/review/'];function sr(p){if(typeof p!=='string')return false;if(p.startsWith('/api/world/'))return false;for(var i=0;i<HN.length;i++){var n=HN[i];if(p===n||p.startsWith(n+'?')||p.startsWith(n+'/'))return false;}for(var j=0;j<WP.length;j++){var w=WP[j];if(p===w||p===w.replace(/\\/$/,'')||p.startsWith(w)||p.startsWith(w.replace(/\\/$/,'')+'?')||p.startsWith(w.replace(/\\/$/,'')+'/'))return true;}return false;}function wid(){var p=location.pathname;var m=p.match(/^\\/(world|inbox|session)\\/([^/?#]+)/);if(m)return m[2];if(/^\\/(?:worlds?|workspaces?|world|sandbox|session|inbox|plan|design|repos|runbooks|assets|api|health|favicon)($|\\/|\\?)/.test(p))return null;var r=p.match(/^\\/([a-z][a-z0-9-]+)(?:\\/|$|\\?)/);return r?r[1]:null;}function rw(p){var w=wid();return w?'/api/world/'+w+p:p;}var of=window.fetch.bind(window);window.fetch=function(input,init){var pr;if(typeof input==='string'&&sr(input))pr=of(rw(input),init);else if(input&&typeof input.url==='string'&&sr(input.url))pr=of(new Request(rw(input.url),input),init);else pr=of(input,init);return pr.then(function(res){if(res&&res.status===401)recover();return res;});};var OE=window.EventSource;if(OE){window.EventSource=function(u,i){var s=u;if(typeof s==='string'&&sr(s))s=rw(s);var es=new OE(s,i);es.addEventListener('error',function(){if(es.readyState===OE.CLOSED)recover();});return es;};window.EventSource.prototype=OE.prototype;window.EventSource.CONNECTING=OE.CONNECTING;window.EventSource.OPEN=OE.OPEN;window.EventSource.CLOSED=OE.CLOSED;}})();</script>`;
3163
3264
 
3164
3265
  /**
@@ -3181,35 +3282,40 @@ function buildPlanChatBearerInjection() {
3181
3282
  }
3182
3283
  }
3183
3284
 
3184
- // Phase D1Selective BOOTSTRAP_SCRIPT no-op.
3285
+ // Phase E5 (ATOMIC SERVING CUTOVER) — BOOTSTRAP_SCRIPT no longer injected.
3185
3286
  //
3186
- // Planning paths use plan-chat-spa's own readBearer() resolver
3187
- // (lib/bearer.ts) which reads window.__OLAM_PLAN_CHAT_BEARER__ injected
3188
- // inline OR falls back to the URL hash channel. They DO NOT need the
3189
- // host-cp bootstrap's cookie+fetch-rewrite shim. Non-planning surfaces
3190
- // (/workspaces, /repos, /runbooks, /design, /inbox, /world/:id/editor,
3191
- // /world/:id/events) still rely on bootstrap-injected cookie + the
3192
- // monkey-patched fetch/EventSource that rewrites world-scoped paths
3193
- // to /api/world/<id>/... keep injecting for them until Phase E
3194
- // migrates each to a bootstrap-free pattern.
3287
+ // host-cp now serves plan-chat-spa exclusively. plan-chat-spa's own
3288
+ // bundle re-homes BOTH auth paths:
3289
+ // - readBearer() (lib/bearer.ts) reads window.__OLAM_PLAN_CHAT_BEARER__
3290
+ // injected inline below (the `bearerInjection`) OR falls back to the
3291
+ // URL-hash channel.
3292
+ // - the cookie-bootstrap + world-fetch-rewrite + 401-recover shim
3293
+ // (lib/worldFetch.ts, installed at the top of src/main.tsx) handles
3294
+ // the cookie + path-rewrite duties that host-cp's BOOTSTRAP_SCRIPT
3295
+ // used to perform.
3296
+ // So the served shell injects ONLY the bearer; the bootstrap shim is
3297
+ // dropped. isPlanningPath() (bootstrap-selective.mjs) is now a wildcard
3298
+ // (true for every string path) — this function relies on that to never
3299
+ // inject BOOTSTRAP_SCRIPT.
3195
3300
  //
3196
- // Reversal: edit BOOTSTRAP_NOOP_PLANNING_PATHS in bootstrap-selective.mjs
3197
- // to [] to restore universal injection. Single-line change.
3301
+ // Reversal: re-narrow isPlanningPath() in bootstrap-selective.mjs (see
3302
+ // the revert-seam note there) and restore the `bootstrapPart` branch
3303
+ // below if a surface ever needs host-cp's bootstrap again.
3198
3304
  //
3199
- // Per K1 SCP-3 + phase-d-tasks D1 acceptance.
3305
+ // Per K1 SCP-3 + phase-d-tasks D1 + phase-e-tasks E2 acceptance.
3200
3306
 
3201
3307
  async function renderSpaShell(filePath, pathname) {
3202
3308
  const stat = fs.statSync(filePath);
3203
3309
  const bearerInjection = buildPlanChatBearerInjection();
3204
- // Path-selective: planning paths skip the bootstrap shim entirely
3205
- // (plan-chat-spa's readBearer handles auth); non-planning paths
3206
- // retain it (Phase E will migrate them).
3310
+ // Phase E5: BOOTSTRAP_SCRIPT is never injected plan-chat-spa's own
3311
+ // worldFetch.ts shim owns the cookie-bootstrap + path-rewrite contract.
3312
+ // We still assert the wildcard invariant so a future re-narrowing of
3313
+ // isPlanningPath() surfaces here loudly rather than silently shipping a
3314
+ // mixed shell.
3207
3315
  const skipBootstrap = isPlanningPath(pathname);
3208
- const bootstrapPart = skipBootstrap ? '' : BOOTSTRAP_SCRIPT;
3209
- // Cache key includes bearer length AND the bootstrap-presence bit so
3210
- // /plan and /workspaces don't share a cached shell.
3211
- const cacheKey =
3212
- stat.mtimeMs + ':' + bearerInjection.length + ':' + (skipBootstrap ? '0' : '1');
3316
+ // Cache key includes bearer length (the only per-render-varying input
3317
+ // now that bootstrap injection is constant-empty).
3318
+ const cacheKey = stat.mtimeMs + ':' + bearerInjection.length;
3213
3319
  const cached = _spaCacheByKey.get(cacheKey);
3214
3320
  if (cached !== undefined) return cached;
3215
3321
  let html = fs.readFileSync(filePath, 'utf-8');
@@ -3219,15 +3325,11 @@ async function renderSpaShell(filePath, pathname) {
3219
3325
  // which 404s. Rewrite to absolute `/assets/` so all SPA shell paths
3220
3326
  // (/, /worlds, /workspaces, /world/<id>) reference the same bundle.
3221
3327
  html = html.replace(/(href|src)="\.\/assets\//g, '$1="/assets/');
3222
- // Inject right after <head> so the bootstrap runs before any other
3223
- // script tag on the page. Bearer injection runs after the host-cp
3224
- // bootstrap so window.__OLAM_PLAN_CHAT_BEARER__ is set before the
3225
- // SPA bundle reads it. On planning paths the bootstrap is empty —
3226
- // bearer injection still runs (plan-chat-spa reads it directly).
3227
- html = html.replace(
3228
- /<head>/i,
3229
- `<head>\n ${bootstrapPart}\n ${bearerInjection}`,
3230
- );
3328
+ // Inject the bearer right after <head> so window.__OLAM_PLAN_CHAT_BEARER__
3329
+ // is set before the SPA bundle reads it. No bootstrap shim see the
3330
+ // block comment above (Phase E5 cutover).
3331
+ void skipBootstrap; // wildcard invariant: always true; documents intent
3332
+ html = html.replace(/<head>/i, `<head>\n ${bearerInjection}`);
3231
3333
  _spaCacheByKey.set(cacheKey, html);
3232
3334
  return html;
3233
3335
  }
@@ -3303,28 +3405,41 @@ server.on('upgrade', (req, clientSocket, head) => {
3303
3405
  }
3304
3406
  });
3305
3407
 
3306
- // Probe persisted tunnels on startup; mark unreachable ones stale.
3307
- tunnelManager.probeAllOnStartup().catch((err) => {
3308
- console.error(`tunnel startup probe failed: ${err.message}`);
3309
- });
3408
+ // SERVE-ONLY: everything below this point through reconcileWorldsWithDocker
3409
+ // is world-orchestration observability — tunnel probes, the worlds.db /
3410
+ // docker snapshot loops, the per-world activity tracker, and the boot-time
3411
+ // reconcile. None of it has a docker daemon / worlds.db / world tunnels on a
3412
+ // managed cluster. Skip it all in serve-only; the snapshot timers + tracker
3413
+ // stay unstarted so the shutdown handler's `?.`-guarded stops are safe.
3414
+ if (!SERVE_ONLY) {
3415
+ // Probe persisted tunnels on startup; mark unreachable ones stale.
3416
+ tunnelManager.probeAllOnStartup().catch((err) => {
3417
+ console.error(`tunnel startup probe failed: ${err.message}`);
3418
+ });
3310
3419
 
3311
- // Start the 1-Hz worlds.db hash-diff loop after the server boots so
3312
- // the initial broadcast happens once the route is reachable.
3313
- startWorldsSnapshotLoop();
3314
- // Phase B-bonus: start tunnel + listening snapshot loops. Both
3315
- // hash-debounce so idle windows produce zero broadcasts.
3316
- startTunnelsSnapshotLoop();
3317
- startListeningSnapshotLoop();
3420
+ // Start the 1-Hz worlds.db hash-diff loop after the server boots so
3421
+ // the initial broadcast happens once the route is reachable.
3422
+ startWorldsSnapshotLoop();
3423
+ // Phase B-bonus: start tunnel + listening snapshot loops. Both
3424
+ // hash-debounce so idle windows produce zero broadcasts.
3425
+ startTunnelsSnapshotLoop();
3426
+ startListeningSnapshotLoop();
3427
+ }
3318
3428
 
3319
3429
  // Closes #965: live thought_count + total_cost_usd updates from each
3320
3430
  // active world's Claude session JSONL. Periodic (60s default) so Rico's
3321
3431
  // scheduling loop can read fresh values from the `worlds` table and
3322
3432
  // SPAs can subscribe to the `world.activity.tick` event. Fail-soft per
3323
3433
  // world: missing/malformed JSONL never crashes the loop.
3324
- const worldActivityTracker = startWorldActivityTracker({
3325
- dbPath: WORLDS_DB_PATH,
3326
- broadcaster: hostStream,
3327
- });
3434
+ //
3435
+ // SERVE-ONLY: reads worlds.db (absent on a managed cluster). `null` sentinel
3436
+ // keeps the shutdown handler's `worldActivityTracker?.stop()` a no-op.
3437
+ const worldActivityTracker = SERVE_ONLY
3438
+ ? null
3439
+ : startWorldActivityTracker({
3440
+ dbPath: WORLDS_DB_PATH,
3441
+ broadcaster: hostStream,
3442
+ });
3328
3443
 
3329
3444
  // ── Phase 1a / B1 (PR3): engine-select + await-before-listen ─────
3330
3445
  //
@@ -3343,14 +3458,20 @@ const worldActivityTracker = startWorldActivityTracker({
3343
3458
  // resolve through the same async branch for symmetry — the call-site
3344
3459
  // migration to engine.* methods is a downstream task; today the engine
3345
3460
  // instance is held for /health diagnostic + future use.
3346
- const hostCpEngine = await (async () => {
3347
- if (HOST_CP_ENGINE === 'kubernetes') {
3348
- const { createKubernetesEngine } = await import('./engines/kubernetes.mjs');
3349
- return createKubernetesEngine({ env: process.env });
3350
- }
3351
- const { createDockerEngine } = await import('./engines/docker.mjs');
3352
- return createDockerEngine({ dockerHost: DOCKER_HOST });
3353
- })();
3461
+ // SERVE-ONLY: don't resolve a real container engine — there's no docker
3462
+ // daemon to talk to and the KubernetesEngine factory runs a context-
3463
+ // allowlist guard that has no managed-cluster meaning here. Use a minimal
3464
+ // inert engine descriptor so /health still reports an engine name.
3465
+ const hostCpEngine = SERVE_ONLY
3466
+ ? { engineName: 'serve-only', context: undefined }
3467
+ : await (async () => {
3468
+ if (HOST_CP_ENGINE === 'kubernetes') {
3469
+ const { createKubernetesEngine } = await import('./engines/kubernetes.mjs');
3470
+ return createKubernetesEngine({ env: process.env });
3471
+ }
3472
+ const { createDockerEngine } = await import('./engines/docker.mjs');
3473
+ return createDockerEngine({ dockerHost: DOCKER_HOST });
3474
+ })();
3354
3475
 
3355
3476
  // ── Boot-time worlds.db ↔ docker reconciler (issue #963) ─────────────
3356
3477
  //
@@ -3359,17 +3480,24 @@ const hostCpEngine = await (async () => {
3359
3480
  // world is running/active but the container is gone, mark it 'orphaned'.
3360
3481
  // Fail-soft: docker unreachable or DB unavailable → log + continue boot.
3361
3482
  // Runs BEFORE server.listen() so the first request sees reconciled state.
3362
- try {
3363
- await reconcileWorldsWithDocker({
3364
- dbPath: WORLDS_DB_PATH,
3365
- listContainerNames: () => defaultListContainerNames(DOCKER_API_BASE, console.log),
3366
- });
3367
- } catch (err) {
3368
- console.error(`[boot-reconciler] unexpected error (continuing boot): ${err.message}`);
3483
+ //
3484
+ // SERVE-ONLY: no worlds.db / docker container list on a managed cluster.
3485
+ if (!SERVE_ONLY) {
3486
+ try {
3487
+ await reconcileWorldsWithDocker({
3488
+ dbPath: WORLDS_DB_PATH,
3489
+ listContainerNames: () => defaultListContainerNames(DOCKER_API_BASE, console.log),
3490
+ });
3491
+ } catch (err) {
3492
+ console.error(`[boot-reconciler] unexpected error (continuing boot): ${err.message}`);
3493
+ }
3369
3494
  }
3370
3495
 
3371
3496
  server.listen(PORT, '0.0.0.0', () => {
3372
3497
  console.log(`olam-host-cp B3 listening on :${PORT}`);
3498
+ if (SERVE_ONLY) {
3499
+ console.log(' [serve-only] OLAM_HOST_CP_SERVE_ONLY=true — SPA + host-native /api/* only; world orchestration disabled (/api/world/* → 503 orchestration_unavailable).');
3500
+ }
3373
3501
  console.log(` DOCKER_HOST=${DOCKER_HOST}`);
3374
3502
  console.log(` cache TTL=${TTL_SEC}s`);
3375
3503
  console.log(` worlds known: ${Object.keys(WORLDS).join(', ') || '(none)'}`);
@@ -3404,11 +3532,12 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
3404
3532
  console.log(`received ${sig}, shutting down`);
3405
3533
  stopEvents();
3406
3534
  prPoller.stop();
3407
- worldsDbReconciler.stop();
3535
+ // worldsDbReconciler + worldActivityTracker are null in SERVE-ONLY mode.
3536
+ worldsDbReconciler?.stop();
3408
3537
  stopWorldsSnapshotLoop();
3409
3538
  stopTunnelsSnapshotLoop();
3410
3539
  stopListeningSnapshotLoop();
3411
- worldActivityTracker.stop();
3540
+ worldActivityTracker?.stop();
3412
3541
  if (serversSnapshotTimer) { clearTimeout(serversSnapshotTimer); serversSnapshotTimer = null; }
3413
3542
  hostStream.close();
3414
3543
  if (ndjsonSpanSink) ndjsonSpanSink.close().catch(() => {});