@pleri/olam-cli 0.1.169 → 0.1.170

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +38 -0
  2. package/dist/commands/auth-status.d.ts +1 -0
  3. package/dist/commands/auth-status.d.ts.map +1 -1
  4. package/dist/commands/auth-status.js +45 -4
  5. package/dist/commands/auth-status.js.map +1 -1
  6. package/dist/commands/create.d.ts.map +1 -1
  7. package/dist/commands/create.js +26 -0
  8. package/dist/commands/create.js.map +1 -1
  9. package/dist/commands/enter.d.ts.map +1 -1
  10. package/dist/commands/enter.js +5 -0
  11. package/dist/commands/enter.js.map +1 -1
  12. package/dist/commands/resume.d.ts +63 -0
  13. package/dist/commands/resume.d.ts.map +1 -0
  14. package/dist/commands/resume.js +174 -0
  15. package/dist/commands/resume.js.map +1 -0
  16. package/dist/commands/setup.d.ts +19 -0
  17. package/dist/commands/setup.d.ts.map +1 -1
  18. package/dist/commands/setup.js +157 -19
  19. package/dist/commands/setup.js.map +1 -1
  20. package/dist/image-digests.json +8 -8
  21. package/dist/index.js +1021 -576
  22. package/dist/index.js.map +1 -1
  23. package/dist/lib/health-probes.d.ts +28 -0
  24. package/dist/lib/health-probes.d.ts.map +1 -1
  25. package/dist/lib/health-probes.js +75 -0
  26. package/dist/lib/health-probes.js.map +1 -1
  27. package/dist/lib/k8s-context-discovery.d.ts +80 -0
  28. package/dist/lib/k8s-context-discovery.d.ts.map +1 -0
  29. package/dist/lib/k8s-context-discovery.js +102 -0
  30. package/dist/lib/k8s-context-discovery.js.map +1 -0
  31. package/dist/mcp-server.js +1273 -771
  32. package/dist/spawn/home-override.d.ts +82 -0
  33. package/dist/spawn/home-override.d.ts.map +1 -0
  34. package/dist/spawn/home-override.js +107 -0
  35. package/dist/spawn/home-override.js.map +1 -0
  36. package/hermes-bundle/version.json +1 -1
  37. package/host-cp/k8s/manifests/30-configmap.yaml +5 -0
  38. package/host-cp/k8s/manifests/50-deployment.yaml +9 -2
  39. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  40. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  41. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  42. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  43. package/host-cp/lifecycle/classify.mjs +110 -0
  44. package/host-cp/lifecycle/emit.mjs +119 -0
  45. package/host-cp/lifecycle/evidence.mjs +45 -0
  46. package/host-cp/lifecycle/failure-kinds.mjs +56 -0
  47. package/host-cp/lifecycle/index.mjs +22 -0
  48. package/host-cp/lifecycle/phases.mjs +52 -0
  49. package/host-cp/observability/grafana-port-forward.sh +1 -1
  50. package/host-cp/observability/kyverno-cardinality-mutate.sh +2 -2
  51. package/host-cp/observability/loki-ingest.sh +1 -1
  52. package/host-cp/observability/ndjson-span-sink.mjs +131 -0
  53. package/host-cp/observability/prom-no-double-grafana.sh +4 -4
  54. package/host-cp/observability/redactor.mjs +72 -0
  55. package/host-cp/recovery/engine.mjs +148 -0
  56. package/host-cp/recovery/index.mjs +16 -0
  57. package/host-cp/recovery/ledger.mjs +105 -0
  58. package/host-cp/recovery/recipes.mjs +46 -0
  59. package/host-cp/recovery/scenarios.mjs +124 -0
  60. package/host-cp/recovery/step-runners.mjs +263 -0
  61. package/host-cp/src/docker-events.mjs +30 -6
  62. package/host-cp/src/pr-nanny.mjs +55 -3
  63. package/host-cp/src/server.mjs +173 -0
  64. package/package.json +1 -1
@@ -34,7 +34,15 @@ import { computeProgress } from './world-progress.mjs';
34
34
  import { createPrCache } from './pr-cache.mjs';
35
35
  import { fetchContainerSecret } from './container-secret-fetcher.mjs';
36
36
  import { subscribeDockerEvents } from './docker-events.mjs';
37
+ import {
38
+ recordWorldLifecycle,
39
+ emptyEvidence,
40
+ WorldLifecyclePhase,
41
+ WorldStartupFailureKind,
42
+ } from '../lifecycle/index.mjs';
37
43
  import { createHostStream, newStreamId } from './host-stream.mjs';
44
+ import { createNdjsonSpanSink } from '../observability/ndjson-span-sink.mjs';
45
+ import { attemptRecovery, findScenarioForKind } from '../recovery/index.mjs';
38
46
  import { detectHaltChunk } from './halt-detect.mjs';
39
47
  import { spawnUpgraderContainer } from './upgrade-spawner.mjs';
40
48
  import { parseProxyPath, perWorldBase, proxyToWorld } from './proxy.mjs';
@@ -74,6 +82,7 @@ import {
74
82
  handleServerBridges,
75
83
  } from './routes/process-port.mjs';
76
84
  import { instrumentHandler, renderMetrics } from './metrics.mjs';
85
+ import { handleDispatchFromEmail } from './lib/email-dispatch.mjs';
77
86
 
78
87
  // ── Deployment-mode detection ─────────────────────────────────────
79
88
  //
@@ -142,6 +151,20 @@ const OLAM_REPO_HOST_PATH = process.env.OLAM_REPO_HOST_PATH ?? '';
142
151
  const OLAM_GH_CONFIG_HOST_PATH = process.env.OLAM_GH_CONFIG_HOST_PATH ?? '';
143
152
  const OLAM_UPGRADER_IMAGE = process.env.OLAM_UPGRADER_IMAGE ?? 'ghcr.io/pleri/olam-host-cp:latest';
144
153
  const WORKSPACES_DIR = process.env.OLAM_WORKSPACES_DIR ?? '/data/workspaces';
154
+ // Email-trigger surface (PR feat/email-as-world-trigger). The signing
155
+ // secret is the operator-shared key with the CF Email Worker — see
156
+ // docs/architecture/email-as-trigger.md. The allowlist is enforced
157
+ // defense-in-depth: the worker rejects at SMTP-time so bounces reach
158
+ // senders; we re-check at HTTP-time so a misrouted direct POST cannot
159
+ // bypass it. Both empty → endpoint stays mis-configured and returns
160
+ // 500/403 (fail-closed).
161
+ const OLAM_EMAIL_SIGNING_SECRET = process.env.OLAM_EMAIL_SIGNING_SECRET ?? '';
162
+ const OLAM_EMAIL_ALLOWED_SENDERS = process.env.OLAM_EMAIL_ALLOWED_SENDERS ?? '';
163
+ const OLAM_EMAIL_ATTACHMENTS_ROOT =
164
+ process.env.OLAM_EMAIL_ATTACHMENTS_ROOT ??
165
+ (HOST_CP_MODE === 'container'
166
+ ? '/data/email-attachments'
167
+ : path.join(os.homedir(), '.olam', 'email-attachments'));
145
168
  const WORLD_NAMES_PATH =
146
169
  process.env.OLAM_WORLD_NAMES_PATH ??
147
170
  (HOST_CP_MODE === 'container'
@@ -458,6 +481,15 @@ const sseGate = new SseGate({ maxConcurrent: SSE_CAP });
458
481
  // poll-every-2s `useListeningServers` loop.
459
482
  const hostStream = createHostStream({ log: (m) => console.log(`[host-stream] ${m}`) });
460
483
 
484
+ // Zero-config NDJSON span sink. Subscribes to host-stream `event: span`
485
+ // broadcasts and appends to ~/.olam/logs/host.trace.ndjson (override via
486
+ // OLAM_TRACE_LOG_PATH). Fail-open: a sink-bootstrap error logs a warning
487
+ // and proceeds without tracing rather than blocking host-cp boot.
488
+ const ndjsonSpanSink = await createNdjsonSpanSink({ hostStream }).catch((err) => {
489
+ console.warn(`[trace] NDJSON span sink unavailable: ${err?.message ?? err}`);
490
+ return null;
491
+ });
492
+
461
493
  // A4: coalesce docker-event bursts into a single servers.snapshot. World
462
494
  // boot fires `create` + `start` + healthcheck transitions in <100ms; we
463
495
  // don't want a broadcast storm. Window matches plan-source.md P3 target.
@@ -485,6 +517,93 @@ const stopEvents = subscribeDockerEvents({
485
517
  // this callback is by construction an olam world.
486
518
  scheduleServersSnapshot();
487
519
  },
520
+ // Killshot #2 — emit typed world.lifecycle events alongside the cache
521
+ // invalidate. Docker actions map onto phases as follows:
522
+ // start | restart → Spawning (container boot kicked off)
523
+ // stop → Finished (clean operator-initiated stop)
524
+ // die | kill → Failed (involuntary exit; carries exit code +
525
+ // classifier-derived failureKind)
526
+ // The lifecycle module's classifier runs against a synthetic evidence
527
+ // bundle so the trace records *why* the bucket was chosen. TrustRequired,
528
+ // ReadyForPrompt, and Running emissions are not observable from
529
+ // host-cp's docker-events surface — those transitions happen inside
530
+ // container-cp and are wired in a follow-up (see ADR 033 § Open
531
+ // questions for the planned container-cp → host-cp emission seam).
532
+ onWorldLifecycleEvent: ({ worldId, action, exitCode }) => {
533
+ const now = Date.now();
534
+ if (action === 'start' || action === 'restart') {
535
+ recordWorldLifecycle(hostStream, {
536
+ worldId,
537
+ phase: WorldLifecyclePhase.Spawning,
538
+ at: now,
539
+ });
540
+ return;
541
+ }
542
+ if (action === 'stop') {
543
+ recordWorldLifecycle(hostStream, {
544
+ worldId,
545
+ phase: WorldLifecyclePhase.Finished,
546
+ at: now,
547
+ });
548
+ return;
549
+ }
550
+ if (action === 'die' || action === 'kill') {
551
+ const ev = emptyEvidence(worldId, now);
552
+ ev.lastPhase = WorldLifecyclePhase.Running;
553
+ ev.lastPhaseAt = now;
554
+ if (exitCode !== undefined) ev.processExitCode = exitCode;
555
+ // For involuntary exit with a code we know the bucket up front;
556
+ // skip the classifier inference and pass it through explicitly so
557
+ // the trace records the exact docker-derived signal.
558
+ const failureKind =
559
+ exitCode !== undefined ? WorldStartupFailureKind.ProviderProcessGone : undefined;
560
+ const lifecycleEvent = recordWorldLifecycle(hostStream, {
561
+ worldId,
562
+ phase: WorldLifecyclePhase.Failed,
563
+ at: now,
564
+ evidence: ev,
565
+ failureKind,
566
+ });
567
+
568
+
569
+ // Killshot #3 — bounded auto-recovery. Attempt once per
570
+ // (worldId, failureKind) pair; the engine enforces idempotency.
571
+ // Emit recovery.* events on the host-stream so the NDJSON trace
572
+ // sink captures the full attempt trail.
573
+ const resolvedKind = lifecycleEvent.failureKind ?? null;
574
+ const scenario = findScenarioForKind(resolvedKind);
575
+ if (scenario !== undefined) {
576
+ hostStream.broadcast('recovery.attempt-started', {
577
+ worldId,
578
+ scenario: scenario?.name ?? 'unmatched',
579
+ recipe: scenario?.recipe ?? null,
580
+ });
581
+ attemptRecovery(worldId, ev, resolvedKind)
582
+ .then((entry) => {
583
+ if (entry.outcome === 'escalated') {
584
+ hostStream.broadcast('recovery.escalated', {
585
+ worldId,
586
+ ledgerEntry: entry,
587
+ });
588
+ } else if (entry.outcome === 'success') {
589
+ hostStream.broadcast('recovery.attempt-succeeded', {
590
+ worldId,
591
+ ledgerEntry: entry,
592
+ });
593
+ } else {
594
+ hostStream.broadcast('recovery.attempt-failed', {
595
+ worldId,
596
+ ledgerEntry: entry,
597
+ });
598
+ }
599
+ })
600
+ .catch((err) => {
601
+ // Recovery engine always resolves — this path is a safety net.
602
+ console.error(`[recovery] unexpected engine rejection for ${worldId}: ${err?.message}`);
603
+ });
604
+ }
605
+ }
606
+ },
488
607
  });
489
608
 
490
609
  // Initial servers.snapshot so subscribers connecting before any docker
@@ -2070,6 +2189,59 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
2070
2189
  // B5's CLI uses).
2071
2190
  // When unset, returns 503 with a clear setup hint instead of failing
2072
2191
  // silently — operators wire when they're ready for cloud-mode dogfood.
2192
+ // POST /v1/dispatch-from-email — see docs/architecture/email-as-trigger.md.
2193
+ //
2194
+ // The CF Email Worker (packages/email-worker-cloudflare) HMAC-signs the
2195
+ // canonical payload (Decision 022) and POSTs it here. The host re-validates
2196
+ // the signature, re-checks the sender allowlist (defense in depth), persists
2197
+ // attachments under OLAM_EMAIL_ATTACHMENTS_ROOT/<worldId>/<timestampMs>/,
2198
+ // and either routes the dispatch to a known world or persists a
2199
+ // spawn-pending request for the MCP/CLI layer to drain.
2200
+ //
2201
+ // The body cap here is 30 MiB — 25 MiB attachment ceiling + 5 MiB margin
2202
+ // for the JSON envelope. Larger payloads are rejected at 413.
2203
+ if (url.pathname === '/v1/dispatch-from-email' && req.method === 'POST') {
2204
+ const chunks = [];
2205
+ let size = 0;
2206
+ const MAX_BODY = 30 * 1024 * 1024;
2207
+ let aborted = false;
2208
+ req.on('data', (chunk) => {
2209
+ size += chunk.length;
2210
+ if (size > MAX_BODY) {
2211
+ aborted = true;
2212
+ jsonReply(res, 413, { error: 'body_too_large', maxBytes: MAX_BODY });
2213
+ req.destroy();
2214
+ return;
2215
+ }
2216
+ chunks.push(chunk);
2217
+ });
2218
+ req.on('end', async () => {
2219
+ if (aborted) return;
2220
+ let dispatch;
2221
+ try {
2222
+ dispatch = JSON.parse(Buffer.concat(chunks).toString('utf8') || '{}');
2223
+ } catch (err) {
2224
+ return jsonReply(res, 400, { error: 'invalid_json', message: err.message });
2225
+ }
2226
+ try {
2227
+ const result = await handleDispatchFromEmail({
2228
+ dispatch,
2229
+ worlds: WORLDS,
2230
+ secret: OLAM_EMAIL_SIGNING_SECRET,
2231
+ attachmentsRoot: OLAM_EMAIL_ATTACHMENTS_ROOT,
2232
+ allowlist: OLAM_EMAIL_ALLOWED_SENDERS,
2233
+ });
2234
+ return jsonReply(res, result.status, result.body);
2235
+ } catch (err) {
2236
+ return jsonReply(res, 500, {
2237
+ error: 'dispatch_failed',
2238
+ message: err instanceof Error ? err.message : String(err),
2239
+ });
2240
+ }
2241
+ });
2242
+ return;
2243
+ }
2244
+
2073
2245
  if (url.pathname === '/api/cloud-dispatch' && req.method === 'POST') {
2074
2246
  const cloudUrl = process.env.OLAM_CLOUD_URL;
2075
2247
  const showcasePw = process.env.OLAM_SHOWCASE_PASSWORD;
@@ -3078,6 +3250,7 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
3078
3250
  stopListeningSnapshotLoop();
3079
3251
  if (serversSnapshotTimer) { clearTimeout(serversSnapshotTimer); serversSnapshotTimer = null; }
3080
3252
  hostStream.close();
3253
+ if (ndjsonSpanSink) ndjsonSpanSink.close().catch(() => {});
3081
3254
  clearInterval(versionPollTimer);
3082
3255
  cache.clear();
3083
3256
  server.close(() => process.exit(0));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pleri/olam-cli",
3
- "version": "0.1.169",
3
+ "version": "0.1.170",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "olam": "./bin/olam.cjs"