@pleri/olam-cli 0.1.170 → 0.1.174

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/agent-stream/driver-runner.js +13 -0
  2. package/dist/commands/auth.d.ts +22 -7
  3. package/dist/commands/auth.d.ts.map +1 -1
  4. package/dist/commands/auth.js +414 -46
  5. package/dist/commands/auth.js.map +1 -1
  6. package/dist/commands/create.d.ts.map +1 -1
  7. package/dist/commands/create.js +45 -1
  8. package/dist/commands/create.js.map +1 -1
  9. package/dist/commands/services.d.ts +39 -0
  10. package/dist/commands/services.d.ts.map +1 -1
  11. package/dist/commands/services.js +64 -9
  12. package/dist/commands/services.js.map +1 -1
  13. package/dist/from-manifest.d.ts +53 -0
  14. package/dist/from-manifest.d.ts.map +1 -0
  15. package/dist/from-manifest.js +95 -0
  16. package/dist/from-manifest.js.map +1 -0
  17. package/dist/image-digests.json +8 -8
  18. package/dist/index.js +911 -137
  19. package/dist/lib/auth-remote.d.ts +130 -0
  20. package/dist/lib/auth-remote.d.ts.map +1 -0
  21. package/dist/lib/auth-remote.js +307 -0
  22. package/dist/lib/auth-remote.js.map +1 -0
  23. package/dist/mcp-server.js +1487 -435
  24. package/hermes-bundle/version.json +1 -1
  25. package/host-cp/k8s/manifests/50-deployment.yaml +1 -1
  26. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  27. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  28. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  29. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  30. package/host-cp/observability/ndjson-span-sink.mjs +52 -0
  31. package/host-cp/src/boot-reconciler.mjs +238 -0
  32. package/host-cp/src/linear-sync.mjs +43 -0
  33. package/host-cp/src/plan-chat-service.mjs +129 -1
  34. package/host-cp/src/port-bridge-manager.mjs +116 -10
  35. package/host-cp/src/server.mjs +121 -1
  36. package/host-cp/src/world-activity-tracker.mjs +392 -0
  37. package/package.json +1 -1
@@ -10,6 +10,7 @@ import path from 'node:path';
10
10
 
11
11
  const DOCKER_HOST = process.env.DOCKER_HOST ?? 'docker-cli';
12
12
  const SOCAT_IMAGE = 'alpine/socat';
13
+ const SOCAT_IMAGE_TAGGED = 'alpine/socat:latest';
13
14
  const HOST_PORT_MIN = 25000;
14
15
  const HOST_PORT_MAX = 25999;
15
16
  const INFRA_PORTS = new Set([8080, 7681, 7682]);
@@ -83,11 +84,73 @@ async function dockerApiBase() {
83
84
  }
84
85
 
85
86
  /**
86
- * Create and start a socat bridge container. Returns containerId.
87
+ * Detect whether a docker error message indicates the image is missing
88
+ * (and therefore a `docker pull` retry would help). Docker uses a handful
89
+ * of phrasings across CLI + HTTP API surfaces.
90
+ */
91
+ function isImageMissingError(message) {
92
+ if (!message) return false;
93
+ return /Unable to find image|pull access denied|manifest unknown|No such image|not found in (the )?(repository|registry)/i.test(
94
+ message,
95
+ );
96
+ }
97
+
98
+ /**
99
+ * Pull alpine/socat:latest via docker CLI. Used by the bare-node bridge
100
+ * create path's fallback retry. 60s budget — image is ~5MB; real pull
101
+ * is typically <2s.
102
+ *
103
+ * @returns {{ok: boolean, stderr: string}}
104
+ */
105
+ function pullSocatViaCli() {
106
+ const r = spawnSync('docker', ['pull', SOCAT_IMAGE_TAGGED], {
107
+ encoding: 'utf-8',
108
+ timeout: 60_000,
109
+ });
110
+ return {
111
+ ok: r.status === 0,
112
+ stderr: (r.stderr ?? '').trim() || (r.stdout ?? '').trim(),
113
+ };
114
+ }
115
+
116
+ /**
117
+ * Pull alpine/socat:latest via Docker HTTP API. Used by the container-mode
118
+ * bridge create path's fallback retry. Streams the pull progress body so
119
+ * Docker actually performs the pull (it's a streaming endpoint).
120
+ *
121
+ * @param {string} apiBase — Docker HTTP API base URL
122
+ * @returns {Promise<{ok: boolean, stderr: string}>}
123
+ */
124
+ async function pullSocatViaHttpApi(apiBase) {
125
+ try {
126
+ const resp = await fetch(
127
+ `${apiBase}/images/create?fromImage=${encodeURIComponent(SOCAT_IMAGE)}&tag=latest`,
128
+ { method: 'POST', signal: AbortSignal.timeout(60_000) },
129
+ );
130
+ if (!resp.ok) {
131
+ const body = await resp.text().catch(() => '');
132
+ return { ok: false, stderr: `pull failed: ${resp.status} ${body}` };
133
+ }
134
+ // Drain the streaming progress body — Docker only completes the pull
135
+ // when the response is consumed.
136
+ await resp.text();
137
+ return { ok: true, stderr: '' };
138
+ } catch (err) {
139
+ return { ok: false, stderr: err?.message ?? String(err) };
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Create and start a socat bridge container.
145
+ *
146
+ * Returns `{ containerId, pulledImage }` — `pulledImage: true` indicates the
147
+ * function had to fall back to `docker pull alpine/socat:latest` (issue #964
148
+ * — preflight in `olam services up` should normally have already pulled it).
149
+ *
87
150
  * @param {string} worldId
88
151
  * @param {number} containerPort
89
152
  * @param {number} hostPort
90
- * @returns {Promise<string>} containerId
153
+ * @returns {Promise<{containerId: string, pulledImage: boolean}>}
91
154
  */
92
155
  async function createBridgeContainer(worldId, containerPort, hostPort) {
93
156
  const name = bridgeContainerName(worldId, containerPort);
@@ -111,11 +174,28 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
111
174
  'TCP-LISTEN:' + containerPort + ',fork,reuseaddr',
112
175
  'TCP:' + devboxName + ':' + containerPort,
113
176
  ];
114
- const result = spawnSync('docker', args, { encoding: 'utf-8', timeout: 10000 });
177
+ let result = spawnSync('docker', args, { encoding: 'utf-8', timeout: 10000 });
178
+ let pulledImage = false;
179
+
180
+ // Issue #964 fallback: if docker run failed because the image is missing,
181
+ // pull it and retry once. This covers hosts where `olam services up`
182
+ // didn't run the preflight (e.g. fresh Hazel install, docker restart
183
+ // pruned the image, etc.).
184
+ if (result.status !== 0 && isImageMissingError(result.stderr ?? '')) {
185
+ const pull = pullSocatViaCli();
186
+ if (!pull.ok) {
187
+ throw new Error(
188
+ `alpine/socat image missing and pull failed: ${pull.stderr || 'unknown error'}`,
189
+ );
190
+ }
191
+ pulledImage = true;
192
+ result = spawnSync('docker', args, { encoding: 'utf-8', timeout: 10000 });
193
+ }
194
+
115
195
  if (result.status !== 0) {
116
196
  throw new Error(result.stderr?.trim() || 'docker run failed');
117
197
  }
118
- return result.stdout.trim(); // container ID
198
+ return { containerId: result.stdout.trim(), pulledImage };
119
199
  }
120
200
 
121
201
  // container mode: Docker HTTP API
@@ -135,7 +215,7 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
135
215
  },
136
216
  };
137
217
 
138
- const createResp = await fetch(
218
+ const doCreate = () => fetch(
139
219
  `${apiBase}/containers/create?name=${encodeURIComponent(name)}`,
140
220
  {
141
221
  method: 'POST',
@@ -145,6 +225,28 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
145
225
  },
146
226
  );
147
227
 
228
+ let createResp = await doCreate();
229
+ let pulledImage = false;
230
+
231
+ // Issue #964 fallback for HTTP API path. Docker returns 404 with a body
232
+ // like {"message":"No such image: alpine/socat:latest"} when the image
233
+ // is missing.
234
+ if (!createResp.ok && createResp.status === 404) {
235
+ const body = await createResp.text().catch(() => '');
236
+ if (isImageMissingError(body)) {
237
+ const pull = await pullSocatViaHttpApi(apiBase);
238
+ if (!pull.ok) {
239
+ throw new Error(
240
+ `alpine/socat image missing and pull failed: ${pull.stderr || 'unknown error'}`,
241
+ );
242
+ }
243
+ pulledImage = true;
244
+ createResp = await doCreate();
245
+ } else {
246
+ throw new Error(`container create failed: 404 ${body}`);
247
+ }
248
+ }
249
+
148
250
  if (!createResp.ok) {
149
251
  const body = await createResp.text().catch(() => '');
150
252
  // If container already exists (409), try to get its ID
@@ -155,7 +257,7 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
155
257
  );
156
258
  if (inspectResp.ok) {
157
259
  const info = await inspectResp.json();
158
- return info.Id;
260
+ return { containerId: info.Id, pulledImage };
159
261
  }
160
262
  }
161
263
  throw new Error(`container create failed: ${createResp.status} ${body}`);
@@ -171,7 +273,7 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
171
273
  throw new Error(`container start failed: ${startResp.status}`);
172
274
  }
173
275
 
174
- return containerId;
276
+ return { containerId, pulledImage };
175
277
  }
176
278
 
177
279
  async function removeBridgeContainer(containerName, containerId) {
@@ -196,7 +298,7 @@ async function removeBridgeContainer(containerName, containerId) {
196
298
  *
197
299
  * @param {string} worldId
198
300
  * @param {number} containerPort
199
- * @returns {Promise<{hostPort: number, containerPort: number, url: string, containerId: string}>}
301
+ * @returns {Promise<{hostPort: number, containerPort: number, url: string, containerId: string, pulledImage?: boolean}>}
200
302
  */
201
303
  export async function exposePort(worldId, containerPort) {
202
304
  if (INFRA_PORTS.has(containerPort)) {
@@ -220,18 +322,22 @@ export async function exposePort(worldId, containerPort) {
220
322
  }
221
323
 
222
324
  const containerName = bridgeContainerName(worldId, containerPort);
223
- const containerId = await createBridgeContainer(worldId, containerPort, hostPort);
325
+ const { containerId, pulledImage } = await createBridgeContainer(worldId, containerPort, hostPort);
224
326
 
225
327
  const entry = { worldId, containerPort, hostPort, containerId, containerName };
226
328
  registry.set(key, entry);
227
329
  saveState();
228
330
 
229
- return {
331
+ const result = {
230
332
  hostPort,
231
333
  containerPort,
232
334
  url: `http://${HOST_IP}:${hostPort}`,
233
335
  containerId,
234
336
  };
337
+ // Only attach pulledImage when true so existing callers/tests don't see
338
+ // an unexpected key when the preflight succeeded.
339
+ if (pulledImage) result.pulledImage = true;
340
+ return result;
235
341
  }
236
342
 
237
343
  /**
@@ -41,7 +41,11 @@ import {
41
41
  WorldStartupFailureKind,
42
42
  } from '../lifecycle/index.mjs';
43
43
  import { createHostStream, newStreamId } from './host-stream.mjs';
44
- import { createNdjsonSpanSink } from '../observability/ndjson-span-sink.mjs';
44
+ import {
45
+ createNdjsonSpanSink,
46
+ attachBetaResponseEvents,
47
+ } from '../observability/ndjson-span-sink.mjs';
48
+ import { betaResponseEmitter } from '@olam/auth-client';
45
49
  import { attemptRecovery, findScenarioForKind } from '../recovery/index.mjs';
46
50
  import { detectHaltChunk } from './halt-detect.mjs';
47
51
  import { spawnUpgraderContainer } from './upgrade-spawner.mjs';
@@ -72,6 +76,11 @@ import { readSecret as readPlanChatSecret, SECRET_PATH as PLAN_CHAT_SECRET_PATH
72
76
  import { createPrMergePoller } from './pr-merge-poller.mjs';
73
77
  import { parse as parseYaml } from 'yaml';
74
78
  import { startWorldsDbReconciler } from './worlds-db-source.mjs';
79
+ import {
80
+ reconcileWorldsWithDocker,
81
+ defaultListContainerNames,
82
+ } from './boot-reconciler.mjs';
83
+ import { startWorldActivityTracker } from './world-activity-tracker.mjs';
75
84
  import { authSecretHint } from './auth-secret-hint.mjs';
76
85
  import * as tunnelManager from './world-tunnel-manager.mjs';
77
86
  import * as bridgeManager from './port-bridge-manager.mjs';
@@ -83,6 +92,7 @@ import {
83
92
  } from './routes/process-port.mjs';
84
93
  import { instrumentHandler, renderMetrics } from './metrics.mjs';
85
94
  import { handleDispatchFromEmail } from './lib/email-dispatch.mjs';
95
+ import { emitTierSuggestion } from '../dispatch/auto-tier-scheduler.mjs';
86
96
 
87
97
  // ── Deployment-mode detection ─────────────────────────────────────
88
98
  //
@@ -490,6 +500,20 @@ const ndjsonSpanSink = await createNdjsonSpanSink({ hostStream }).catch((err) =>
490
500
  return null;
491
501
  });
492
502
 
503
+ // Wire @olam/auth-client `beta-response` events (Anthropic SDK 0.96+ beta
504
+ // flags — thinking-token-count, cache-diagnostics, future passthrough) into
505
+ // the NDJSON trace as `withCredential.beta-response` spans. Opt-in via the
506
+ // caller's `withCredential('claude', fn, { betas: [...] })` options; when
507
+ // no caller opts in, the emitter never fires and this subscription is a
508
+ // no-op. See docs/decisions/047-anthropic-sdk-beta-flags.md.
509
+ if (ndjsonSpanSink) {
510
+ try {
511
+ attachBetaResponseEvents({ sink: ndjsonSpanSink, emitter: betaResponseEmitter });
512
+ } catch (err) {
513
+ console.warn(`[trace] beta-response wire unavailable: ${err?.message ?? err}`);
514
+ }
515
+ }
516
+
493
517
  // A4: coalesce docker-event bursts into a single servers.snapshot. World
494
518
  // boot fires `create` + `start` + healthcheck transitions in <100ms; we
495
519
  // don't want a broadcast storm. Window matches plan-source.md P3 target.
@@ -922,6 +946,58 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
922
946
  if (handled) return;
923
947
  }
924
948
 
949
+ // /api/telemetry/planning-sessions — B9: aggregate planning_sessions by
950
+ // session_source for the canonical-surface bet's adoption signal. Per
951
+ // plan-chat-spa-canonical-surface plan § Operator workflow seam falsification
952
+ // trigger: if plan-chat-spa weekly-active sessions < 60% of control-plane/app
953
+ // by 2026-Q3, freeze plan-chat-spa feature work. This endpoint is the
954
+ // data source for that measurement.
955
+ //
956
+ // Query param: ?since=YYYY-MM-DD (required; rejects with 400 otherwise).
957
+ // Response: { plan_chat_spa: N, control_plane_app: M, unknown: K, ratio: pct }
958
+ // where ratio = plan_chat_spa / (plan_chat_spa + control_plane_app) * 100,
959
+ // null if denominator is 0.
960
+ if (url.pathname === '/api/telemetry/planning-sessions' && req.method === 'GET') {
961
+ const since = url.searchParams.get('since');
962
+ if (!since || !/^\d{4}-\d{2}-\d{2}$/.test(since)) {
963
+ res.writeHead(400, { 'Content-Type': 'application/json' });
964
+ return res.end(JSON.stringify({
965
+ error: 'bad_request',
966
+ message: 'Missing or malformed `since` query param. Expected YYYY-MM-DD.',
967
+ }));
968
+ }
969
+ // B9 ships the endpoint CONTRACT + the session_source schema column.
970
+ // The query implementation goes through plan-chat-service.mjs (which
971
+ // owns the pg pool); this host-cp handler currently emits a 503 with
972
+ // a structured "not_implemented" marker so callers can verify the
973
+ // endpoint shape + auth + query-param parsing without the data path.
974
+ //
975
+ // Phase G of this epic adds the plan-chat-service handler that this
976
+ // endpoint will proxy to. Until then operators can run the SQL
977
+ // directly:
978
+ // SELECT COALESCE(session_source, 'unknown'), COUNT(*)
979
+ // FROM planning_sessions
980
+ // WHERE created_at >= $since
981
+ // GROUP BY 1;
982
+ //
983
+ // Notify-C: ship contract + schema; defer data path to Phase G.
984
+ res.writeHead(503, { 'Content-Type': 'application/json' });
985
+ return res.end(JSON.stringify({
986
+ error: 'not_implemented',
987
+ message: 'B9 ships the endpoint contract + session_source schema column. ' +
988
+ 'Aggregation handler scaffolded in plan-chat-service.mjs lands in Phase G.',
989
+ since,
990
+ contractShape: {
991
+ plan_chat_spa: 0,
992
+ control_plane_app: 0,
993
+ unknown: 0,
994
+ ratio: null,
995
+ since: '<YYYY-MM-DD>',
996
+ asOf: '<ISO 8601>',
997
+ },
998
+ }));
999
+ }
1000
+
925
1001
  // /api/version/status: returns the current version snapshot (baked SHA
926
1002
  // vs operator's local HEAD). No auth required beyond the existing gate
927
1003
  // (already applied above). Phase 1 only — detection, no auto-upgrade.
@@ -2224,6 +2300,23 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2224
2300
  return jsonReply(res, 400, { error: 'invalid_json', message: err.message });
2225
2301
  }
2226
2302
  try {
2303
+ // Auto-tier-scheduler v1 (ADR 042): emit an informational
2304
+ // `dispatch.tier-suggestion` event BEFORE handing off to the
2305
+ // dispatch handler. Pure-informational — never changes which
2306
+ // provider actually runs. The dispatch payload's optional
2307
+ // `tierSpec` ({ kind?, expectedDurationMs?, explicitTier? })
2308
+ // carries the shape; absent it, the heuristic falls through to
2309
+ // its default (`cloudflare-sandbox`).
2310
+ if (dispatch && typeof dispatch.worldId === 'string') {
2311
+ try {
2312
+ emitTierSuggestion({
2313
+ worldId: dispatch.worldId,
2314
+ dispatchSpec: dispatch.tierSpec ?? {},
2315
+ currentTier: null,
2316
+ hostStream,
2317
+ });
2318
+ } catch { /* never let a hint surface break dispatch */ }
2319
+ }
2227
2320
  const result = await handleDispatchFromEmail({
2228
2321
  dispatch,
2229
2322
  worlds: WORLDS,
@@ -3182,6 +3275,16 @@ startWorldsSnapshotLoop();
3182
3275
  startTunnelsSnapshotLoop();
3183
3276
  startListeningSnapshotLoop();
3184
3277
 
3278
+ // Closes #965: live thought_count + total_cost_usd updates from each
3279
+ // active world's Claude session JSONL. Periodic (60s default) so Rico's
3280
+ // scheduling loop can read fresh values from the `worlds` table and
3281
+ // SPAs can subscribe to the `world.activity.tick` event. Fail-soft per
3282
+ // world: missing/malformed JSONL never crashes the loop.
3283
+ const worldActivityTracker = startWorldActivityTracker({
3284
+ dbPath: WORLDS_DB_PATH,
3285
+ broadcaster: hostStream,
3286
+ });
3287
+
3185
3288
  // ── Phase 1a / B1 (PR3): engine-select + await-before-listen ─────
3186
3289
  //
3187
3290
  // Decision 15: the async KubernetesEngine factory MUST be fully awaited
@@ -3208,6 +3311,22 @@ const hostCpEngine = await (async () => {
3208
3311
  return createDockerEngine({ dockerHost: DOCKER_HOST });
3209
3312
  })();
3210
3313
 
3314
+ // ── Boot-time worlds.db ↔ docker reconciler (issue #963) ─────────────
3315
+ //
3316
+ // One-shot pass: if a container is alive but worlds.db has no row, insert
3317
+ // a status='reconciled' row so host-cp can see it. If worlds.db says a
3318
+ // world is running/active but the container is gone, mark it 'orphaned'.
3319
+ // Fail-soft: docker unreachable or DB unavailable → log + continue boot.
3320
+ // Runs BEFORE server.listen() so the first request sees reconciled state.
3321
+ try {
3322
+ await reconcileWorldsWithDocker({
3323
+ dbPath: WORLDS_DB_PATH,
3324
+ listContainerNames: () => defaultListContainerNames(DOCKER_API_BASE, console.log),
3325
+ });
3326
+ } catch (err) {
3327
+ console.error(`[boot-reconciler] unexpected error (continuing boot): ${err.message}`);
3328
+ }
3329
+
3211
3330
  server.listen(PORT, '0.0.0.0', () => {
3212
3331
  console.log(`olam-host-cp B3 listening on :${PORT}`);
3213
3332
  console.log(` DOCKER_HOST=${DOCKER_HOST}`);
@@ -3248,6 +3367,7 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
3248
3367
  stopWorldsSnapshotLoop();
3249
3368
  stopTunnelsSnapshotLoop();
3250
3369
  stopListeningSnapshotLoop();
3370
+ worldActivityTracker.stop();
3251
3371
  if (serversSnapshotTimer) { clearTimeout(serversSnapshotTimer); serversSnapshotTimer = null; }
3252
3372
  hostStream.close();
3253
3373
  if (ndjsonSpanSink) ndjsonSpanSink.close().catch(() => {});