@pleri/olam-cli 0.1.182 → 0.1.185

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/agent-stream/agent-sdk-to-chunks.js +44 -30
  2. package/dist/ask/checkout.d.ts +19 -0
  3. package/dist/ask/checkout.d.ts.map +1 -0
  4. package/dist/ask/checkout.js +40 -0
  5. package/dist/ask/checkout.js.map +1 -0
  6. package/dist/ask/knowledge-pack-builder.d.ts +72 -0
  7. package/dist/ask/knowledge-pack-builder.d.ts.map +1 -0
  8. package/dist/ask/knowledge-pack-builder.js +91 -0
  9. package/dist/ask/knowledge-pack-builder.js.map +1 -0
  10. package/dist/ask/knowledge-pack.generated.d.ts +8 -0
  11. package/dist/ask/knowledge-pack.generated.d.ts.map +1 -0
  12. package/dist/ask/knowledge-pack.generated.js +1947 -0
  13. package/dist/ask/knowledge-pack.generated.js.map +1 -0
  14. package/dist/ask/one-shot.d.ts +21 -0
  15. package/dist/ask/one-shot.d.ts.map +1 -0
  16. package/dist/ask/one-shot.js +50 -0
  17. package/dist/ask/one-shot.js.map +1 -0
  18. package/dist/ask/repl.d.ts +30 -0
  19. package/dist/ask/repl.d.ts.map +1 -0
  20. package/dist/ask/repl.js +109 -0
  21. package/dist/ask/repl.js.map +1 -0
  22. package/dist/ask/sdk-client.d.ts +87 -0
  23. package/dist/ask/sdk-client.d.ts.map +1 -0
  24. package/dist/ask/sdk-client.js +118 -0
  25. package/dist/ask/sdk-client.js.map +1 -0
  26. package/dist/ask/system-prompt.d.ts +30 -0
  27. package/dist/ask/system-prompt.d.ts.map +1 -0
  28. package/dist/ask/system-prompt.js +31 -0
  29. package/dist/ask/system-prompt.js.map +1 -0
  30. package/dist/commands/ask.d.ts +27 -0
  31. package/dist/commands/ask.d.ts.map +1 -0
  32. package/dist/commands/ask.js +63 -0
  33. package/dist/commands/ask.js.map +1 -0
  34. package/dist/commands/auth-list-json.d.ts +53 -0
  35. package/dist/commands/auth-list-json.d.ts.map +1 -0
  36. package/dist/commands/auth-list-json.js +47 -0
  37. package/dist/commands/auth-list-json.js.map +1 -0
  38. package/dist/commands/auth.d.ts.map +1 -1
  39. package/dist/commands/auth.js +13 -0
  40. package/dist/commands/auth.js.map +1 -1
  41. package/dist/commands/doctor.js +11 -11
  42. package/dist/commands/doctor.js.map +1 -1
  43. package/dist/commands/keys-list-json.d.ts +55 -0
  44. package/dist/commands/keys-list-json.d.ts.map +1 -0
  45. package/dist/commands/keys-list-json.js +54 -0
  46. package/dist/commands/keys-list-json.js.map +1 -0
  47. package/dist/commands/keys.d.ts.map +1 -1
  48. package/dist/commands/keys.js +6 -0
  49. package/dist/commands/keys.js.map +1 -1
  50. package/dist/commands/lanes-list-json.d.ts +69 -0
  51. package/dist/commands/lanes-list-json.d.ts.map +1 -0
  52. package/dist/commands/lanes-list-json.js +42 -0
  53. package/dist/commands/lanes-list-json.js.map +1 -0
  54. package/dist/commands/lanes.d.ts.map +1 -1
  55. package/dist/commands/lanes.js +18 -7
  56. package/dist/commands/lanes.js.map +1 -1
  57. package/dist/commands/plans-list-json.d.ts +77 -0
  58. package/dist/commands/plans-list-json.d.ts.map +1 -0
  59. package/dist/commands/plans-list-json.js +61 -0
  60. package/dist/commands/plans-list-json.js.map +1 -0
  61. package/dist/commands/plans.d.ts.map +1 -1
  62. package/dist/commands/plans.js +10 -0
  63. package/dist/commands/plans.js.map +1 -1
  64. package/dist/commands/repos-list-json.d.ts +58 -0
  65. package/dist/commands/repos-list-json.d.ts.map +1 -0
  66. package/dist/commands/repos-list-json.js +45 -0
  67. package/dist/commands/repos-list-json.js.map +1 -0
  68. package/dist/commands/repos.d.ts +1 -1
  69. package/dist/commands/repos.d.ts.map +1 -1
  70. package/dist/commands/repos.js +12 -2
  71. package/dist/commands/repos.js.map +1 -1
  72. package/dist/commands/services.d.ts +47 -1
  73. package/dist/commands/services.d.ts.map +1 -1
  74. package/dist/commands/services.js +59 -33
  75. package/dist/commands/services.js.map +1 -1
  76. package/dist/commands/skills.d.ts +27 -0
  77. package/dist/commands/skills.d.ts.map +1 -1
  78. package/dist/commands/skills.js +17 -2
  79. package/dist/commands/skills.js.map +1 -1
  80. package/dist/commands/workspace-list-json.d.ts +73 -0
  81. package/dist/commands/workspace-list-json.d.ts.map +1 -0
  82. package/dist/commands/workspace-list-json.js +59 -0
  83. package/dist/commands/workspace-list-json.js.map +1 -0
  84. package/dist/commands/workspace.d.ts.map +1 -1
  85. package/dist/commands/workspace.js +7 -1
  86. package/dist/commands/workspace.js.map +1 -1
  87. package/dist/image-digests.json +8 -8
  88. package/dist/index.js +3170 -580
  89. package/dist/index.js.map +1 -1
  90. package/dist/lib/k8s-bootstrap.d.ts.map +1 -1
  91. package/dist/lib/k8s-bootstrap.js +13 -1
  92. package/dist/lib/k8s-bootstrap.js.map +1 -1
  93. package/dist/lib/k8s-secret-render.d.ts +2 -0
  94. package/dist/lib/k8s-secret-render.d.ts.map +1 -1
  95. package/dist/lib/k8s-secret-render.js +27 -0
  96. package/dist/lib/k8s-secret-render.js.map +1 -1
  97. package/dist/lib/peripheral-registry.d.ts +1 -1
  98. package/dist/lib/peripheral-registry.d.ts.map +1 -1
  99. package/dist/lib/peripheral-registry.js +13 -0
  100. package/dist/lib/peripheral-registry.js.map +1 -1
  101. package/dist/lib/upgrade-kubernetes.d.ts +6 -0
  102. package/dist/lib/upgrade-kubernetes.d.ts.map +1 -1
  103. package/dist/lib/upgrade-kubernetes.js +7 -1
  104. package/dist/lib/upgrade-kubernetes.js.map +1 -1
  105. package/dist/mcp-server.js +1167 -37
  106. package/hermes-bundle/version.json +1 -1
  107. package/host-cp/k8s/manifests/30-configmap.yaml +11 -6
  108. package/host-cp/k8s/manifests/50-deployment.yaml +15 -1
  109. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  110. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  111. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  112. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  113. package/host-cp/k8s/templates/chunks-postgres-secret-template.yaml +24 -0
  114. package/host-cp/k8s/templates/plan-chat-service-secret-template.yaml +35 -0
  115. package/host-cp/src/plan-chat-service.mjs +99 -74
  116. package/host-cp/src/server.mjs +141 -5
  117. package/package.json +4 -2
@@ -0,0 +1,1947 @@
1
+ /**
2
+ * AUTO-GENERATED by scripts/gen-knowledge-pack.mjs — DO NOT EDIT BY HAND.
3
+ *
4
+ * The bundled olam knowledge pack used as the `olam ask` SDK system prompt.
5
+ * Regenerate with: npm run gen:knowledge-pack --workspace=@pleri/olam-cli
6
+ */
7
+ /* eslint-disable */
8
+ export const KNOWLEDGE_PACK = `# Olam knowledge pack
9
+
10
+ The sections below are curated excerpts from the olam repository's own
11
+ documentation, bundled into the CLI at build time. Treat them as the
12
+ authoritative source for olam usage, setup, and CLI behaviour. When a section
13
+ conflicts with your prior knowledge, the section wins.
14
+
15
+ ---
16
+
17
+ ## Olam — README (overview, substrates, scope)
18
+
19
+ Source: \`README.md\`
20
+
21
+ # Olam
22
+
23
+ **The thought is the artifact. Code is a side effect.**
24
+
25
+ Olam provisions disposable development worlds from a single "seed of
26
+ thought" and drives them toward a reviewed pull request. Every world
27
+ is isolated, vault-authenticated, and auditable end-to-end — from the
28
+ initial prompt to the PR's opened URL.
29
+
30
+ The name comes from the Hebrew word for "world."
31
+
32
+ ## Two substrates, one paradigm
33
+
34
+ Olam runs on either of two substrates — each optimized for a different
35
+ operator role. Same dashboard SPA and CLI; the API surface overlaps on
36
+ the core world-lifecycle paths but diverges significantly on operator tooling.
37
+
38
+ | | **Cloudflare** (\`packages/cloudflare-worker/\`) | **Local docker** (\`packages/host-cp/\`) |
39
+ |---|---|---|
40
+ | Worlds | CF Sandbox (per-world Durable Object + Container) | Docker container on the host |
41
+ | Auth | Worker OAuth token exchange, tokens in KV \`OLAM_CREDS\` | Long-lived local auth container at \`:9999\`, tokens in a docker volume |
42
+ | Storage | KV + R2 | Host filesystem under \`~/.olam/\` |
43
+ | Optimized for | **Published-ship**: worlds run autonomously; zero-install contributors; shared team deployments | **Operator-facing**: tight iteration, dogfood, credential vault management, Docker-level introspection |
44
+ | Operator tooling | World lifecycle + OAuth only | Full surface: process inspection, port bridging, tunnel management, planner, credential CRUD, \`gh\`-backed PR listing |
45
+
46
+ **CF is the published-ship substrate.** Once a Worker is deployed, worlds
47
+ run autonomously inside CF Containers. The operator's interactive workflow
48
+ stays on host-cp; CF does not expose host-level APIs (Docker socket, local
49
+ filesystem, \`gh\` CLI) because it has none.
50
+
51
+ **host-cp is the operator-facing substrate.** It exposes the full feature
52
+ surface including Docker orchestration, per-world process inspection, port
53
+ bridging, tunnel management, planning subsystem, and credential vault CRUD.
54
+ Parity with CF is a per-feature design decision, not a contract obligation
55
+ (see [ADR-011](docs/decisions/011-two-substrate-parity.md)).
56
+
57
+ Tokens, workspace configs, and skill bundles live **outside any world**
58
+ on both substrates. Destroying a world never destroys any of them.
59
+
60
+ ## Three world-runner tiers
61
+
62
+ Orthogonal to the deploy substrate above, Olam ships three world-runner
63
+ tiers — each tuned for a different runtime-cost shape. Pick by
64
+ cold-start tolerance and task shape:
65
+
66
+ | Tier | Cold start | Use for |
67
+ |---|---|---|
68
+ | \`docker\` | 5–15 s | Heavyweight Claude Agent SDK loops, multi-step coding tasks, anything that needs a full devbox. **Default for \`/goal\`-style world work.** |
69
+ | \`cloudflare-sandbox\` | 2–4 s | Per-task containerised isolation on Cloudflare's edge — same agent capabilities as docker without a host Docker daemon. Use when host Docker is unavailable or for multi-op fan-out. |
70
+ | \`cloudflare-isolate\` | single-digit ms | One-shot RPC tasks: doc lookups, deterministic transforms, keystroke-scale work that doesn't deserve the boot tax. No agent loop, no persistent state, no container. v1 implements \`lookup\` only ([ADR 022](docs/decisions/022-v8-isolate-runtime-tier.md)). |
71
+
72
+ Rule of thumb: if the task is "receive input → call one API or run one
73
+ function → return result" and finishes in <1 s of real work, it belongs
74
+ on the isolate tier. If it needs an agent loop, tool use, or persistent
75
+ file state, it belongs on \`docker\` or \`cloudflare-sandbox\`. The
76
+ per-workspace default is \`compute.default\` in \`.olam/config.yaml\`.
77
+
78
+ ## Scope
79
+
80
+ Olam is one monorepo, six surfaces:
81
+
82
+ | Surface | Role |
83
+ |---------|------|
84
+ | **CF Worker** (\`packages/cloudflare-worker/\`) | Edge router, OAuth token exchange, per-world Durable Object, vault (KV + R2), completion-event sink |
85
+ | **Local auth service** (\`packages/auth-service/\`) | Long-lived Linux container running the same OAuth PKCE dance as the Worker; serves tokens to every local world over a shared-secret-authenticated API |
86
+ | **Docker provider** (\`packages/adapters/src/docker/\` + \`packages/control-plane/standalone/\`) | Local runtime: per-world devbox container, in-container control plane on \`:8080\`, credential refresh daemon, PR-gate hook, volume-mounted workspaces |
87
+ | **Sandbox container** (\`packages/cloudflare-worker/container_src/\`) | CF per-world Node 20 + zsh runtime; control plane on \`:8080\`; pre-baked Claude Code, Codex, \`gh\`, Linear/Slack MCPs, tmux, ttyd |
88
+ | **Operator SPA** (\`packages/plan-chat-spa/\`) | React 19 + Vite; the canonical operator SPA, served by **host-cp** locally and on GKE (Phase E5 atomic serving cutover, 2026-05). Reads authoritative state via \`/api/*\` + Electric chunk shapes; planning + non-planning surfaces (workspaces/repos/runbooks/inbox/world editor+events). The legacy \`packages/control-plane/app\` dashboard SPA is retired as host-cp's served bundle (still bundled by the CF Worker pending its own migration) |
89
+ | **MCP + plugin** (\`packages/mcp-server/\`, \`plugin/\`) | Claude Code plugin exposing \`olam_create\`, \`olam_dispatch\`, \`olam_auth_*\`, \`olam_pr_*\`, and friends so the agent can manage worlds without a separate CLI |
90
+
91
+ **What lives inside a world:** a git checkout of one or more repos, a
92
+ running Claude Code tmux session, scoped credentials injected from the
93
+ vault, and whatever artifacts / diffs the agent produces.
94
+
95
+ **What lives outside a world:** the user's identity + credentials
96
+ (KV), the workspace config (KV), the user's skill bundle (R2), and the
97
+ completion ladder's state record (DO). Destroying a world never
98
+ destroys any of these.
99
+
100
+ See [\`docs/architecture/\`](./docs/architecture/README.md) for the
101
+ full nine-part walkthrough.
102
+
103
+ **Operator-facing guide** to the fat-box runtime that ships warm-create:
104
+ [\`docs/guides/fatbox-runtime/\`](./docs/guides/fatbox-runtime/README.md)
105
+ — plain-English walkthrough with mermaid diagrams covering what
106
+ changed across PRs #394 / #398 / #402 / #404, getting started, and
107
+ troubleshooting. **Start here if you've never used \`olam create\`
108
+ before, or if your warm-create is taking the cold path and you don't
109
+ know why.**
110
+
111
+ **For new orgs onboarding to olam**, the **3-contract pattern** is the
112
+ authoring surface. Read these in order before publishing your first
113
+ devbox image:
114
+
115
+ 1. [\`devbox-contract.md\`](./docs/architecture/devbox-contract.md) — what
116
+ the devbox image must provide so olam + host-cp can drive it.
117
+ 2. [\`manifest-spec.md\`](./docs/architecture/manifest-spec.md) — per-repo
118
+ \`.adb.yaml\` / \`.olam.yaml\` schema, including the
119
+ \`bootstrap[].produces\` annotation that drives Phase 1 warm-create
120
+ sentinel-handoff.
121
+ 3. [\`config-spec.md\`](./docs/architecture/config-spec.md) — workspace-
122
+ level \`.olam/config.yaml\` schema, including the
123
+ \`devbox.registry\` provider/prefix block (ghcr / gar / dockerhub)
124
+ and \`image_selectors\` first-match-wins rules.
125
+
126
+ Adjacent runtime doc:
127
+ [\`snapshot-restore.md\`](./docs/architecture/snapshot-restore.md) —
128
+ the warm-create snapshot cache flow, fingerprint design, 5 UX strings,
129
+ and operator FAQ. Read this when you need to understand why
130
+ \`olam create\` sometimes runs the cold path despite a previous
131
+ warm-create on the same workspace.
132
+
133
+ Atlas-shape reference templates: [\`docs/templates/\`](./docs/templates/)
134
+ (\`devbox.atlas.Dockerfile\`, \`manifest.atlas.adb.yaml\`,
135
+ \`config.atlas.olam.yaml\`).
136
+
137
+ ---
138
+
139
+ ## How it works
140
+
141
+ \`\`\`mermaid
142
+ flowchart LR
143
+ User[User machine]
144
+ User --> Bootstrap["olam bootstrap"]
145
+ Bootstrap --> Smoke[docker info smoke]
146
+ Smoke --> Pulls[("Parallel pull by digest<br/>retry · throttle · coalesce")]
147
+ Pulls --> HostCp["ghcr.io/pleri/olam-host-cp"]
148
+ Pulls --> Auth["ghcr.io/pleri/olam-auth"]
149
+ Pulls --> Devbox["ghcr.io/pleri/olam-devbox"]
150
+ HostCp --> Handshake["protocol-version handshake<br/>(refuse on no-overlap)"]
151
+ Auth --> Handshake
152
+ Devbox --> Handshake
153
+ Handshake --> HostCpStart[olam host-cp start]
154
+ HostCpStart --> AuthUp[olam auth up]
155
+ AuthUp --> AuthLogin["olam auth login<br/>(PKCE; --skip-auth-login to bypass)"]
156
+ AuthLogin --> Create["olam create --task ..."]
157
+ Create --> World[("Devbox world<br/>+ in-world claude-main")]
158
+ World --> Ladder["Completion ladder<br/>draft → recommendations →<br/>adversarial_review → audit_passed →<br/>pr_eligible → pr_opened"]
159
+ \`\`\`
160
+
161
+ \`olam setup\` is the canonical fresh-host wizard (default substrate:
162
+ kubernetes/k3d; see [Quick start](#quick-start)). The diagram above
163
+ shows \`olam bootstrap\` — the **docker-compose** on-ramp it delegates to
164
+ on the docker path (and the direct entry point for CI / scripted
165
+ contexts). \`bootstrap\` fans out three GHCR pulls in parallel
166
+ (digest-pinned, single in-flight per ref, single bounded retry on
167
+ transient failure), verifies every pulled image's
168
+ \`olam.protocol.versions\` label overlaps the CLI's, then drives
169
+ \`host-cp start\` + \`auth up\` + \`auth login\` to a working stack. Exit
170
+ codes are explicit: \`3\` = pull failed, \`4\` = protocol mismatch.
171
+
172
+ ---
173
+
174
+ ## Quick start
175
+
176
+ \`\`\`bash
177
+ curl -fsSL https://olam.bar.dev/install | sh
178
+ olam setup
179
+ \`\`\`
180
+
181
+ That's it. The installer puts \`@pleri/olam-cli\` on your PATH (requires Node.js ≥ 20 and npm). \`olam setup\` installs k3d (if absent), creates a local Kubernetes cluster named \`olam-dev\`, and brings up the full peripheral stack (host-cp, auth-service, mcp-auth-service, kg-service, memory-service). Works on macOS and Linux. No source checkout required.
182
+
183
+ The setup wizard is **idempotent** — re-running skips steps that are already complete.
184
+
185
+ After setup, every world is one call:
186
+
187
+ \`\`\`bash
188
+ olam create --name my-world --task "audit the auth module for SSRF"
189
+ \`\`\`
190
+
191
+ Full setup guide (prereqs, observability, troubleshooting):
192
+ [\`docs/onboarding/k3s-mode-setup.md\`](./docs/onboarding/k3s-mode-setup.md).
193
+
194
+ ### Docker Compose (lighter alternative)
195
+
196
+ For hosts that can't run a Kubernetes cluster locally, or for CI:
197
+
198
+ \`\`\`bash
199
+ curl -fsSL https://olam.bar.dev/install | sh
200
+ olam setup --substrate=docker
201
+ \`\`\`
202
+
203
+ This runs three host containers (auth, mcp-auth, kg-service) via docker compose instead of a full cluster. Existing docker-compose installs are protected: \`~/.olam/config.json\` with \`host.substrate: 'compose'\` continues on docker with a migration hint.
204
+
205
+ Full setup guide for compose mode: [\`docs/onboarding/fresh-machine-setup.md\`](./docs/onboarding/fresh-machine-setup.md).
206
+
207
+ ---
208
+
209
+ ## Setup
210
+
211
+ ### Install the CLI
212
+
213
+ \`\`\`sh
214
+ curl -fsSL https://olam.bar.dev/install | sh
215
+ \`\`\`
216
+
217
+ The installer is POSIX-clean (works under \`dash\`, \`sh\`, or \`bash\`) and
218
+ publishes from npmjs.org. It checks for **Node.js ≥ 20** (hard
219
+ requirement) and \`npm\`, then runs \`npm install -g @pleri/olam-cli\`.
220
+ Use \`OLAM_CHANNEL=canary\` to track the prerelease tag.
221
+
222
+ ### Bring up the stack
223
+
224
+ \`\`\`bash
225
+ olam setup # k3d cluster + full peripheral stack (default)
226
+ olam setup -y # non-interactive: auto-affirm every prompt
227
+ olam setup --substrate=docker # docker compose mode (3 containers, no cluster)
228
+ olam setup --cluster-name foo # use a different k3d cluster name (default: olam-dev)
229
+ \`\`\`
230
+
231
+ \`olam setup\` is the canonical fresh-host wizard. It's substrate-aware: the default is **kubernetes** (k3d), which installs k3d (via brew on macOS, else the upstream install script), creates the \`olam-dev\` cluster, applies all manifests, and verifies every deployment is \`1/1 Running\`. Pass \`-y\` to skip all prompts, or \`--substrate=docker\` to run the lighter compose path instead.
232
+
233
+ Artifacts land under \`~/.olam/\` on the host:
234
+
235
+ - \`~/.olam/auth-secret\` — shared secret (\`0600\`) generated on first
236
+ \`auth up\`; authenticates every world's requests to the auth-service.
237
+ - \`~/.olam/worlds/<world-id>/\` — per-world worktree + thought DB.
238
+ - \`~/.olam/worlds.db\` — world registry.
239
+ - \`~/.olam/upgrade.log\` — JSONL audit log of every \`olam upgrade\` invocation.
240
+
241
+ ### Legacy: \`olam bootstrap\`
242
+
243
+ \`olam bootstrap\` targets the **docker compose** substrate directly (it pulls three
244
+ digest-pinned GHCR images and starts them via compose). It remains the on-ramp for
245
+ the docker path in CI and scripted contexts:
246
+
247
+ \`\`\`bash
248
+ olam bootstrap # pull host-cp + auth + devbox by digest, start services, run auth login
249
+ olam bootstrap --with-smoke # also create a smoke-test world to verify end-to-end
250
+ olam bootstrap --skip-auth-login # CI / scripted use; equivalent to OLAM_BOOTSTRAP_SKIP_AUTH_LOGIN=1
251
+ olam bootstrap --registry ghcr.io/pleri # override the registry prefix
252
+ \`\`\`
253
+
254
+ After bootstrap completes, every subsequent world is one call:
255
+
256
+ \`\`\`bash
257
+ olam create --name my-world --task "audit the auth module for SSRF"
258
+ \`\`\`
259
+
260
+ ### Cloudflare deploy
261
+
262
+ Required for a shared team deployment. Needs a Cloudflare Workers Paid
263
+ plan (DO + Containers), \`wrangler\`, and a CF Access application gating
264
+ your Worker URL.
265
+
266
+ \`\`\`bash
267
+ cp .env.example .env.local
268
+ # Edit .env.local
269
+ \`\`\`
270
+
271
+ The canonical keys (see [\`docs/CF_WORLDS_SPEC.md\`](./docs/CF_WORLDS_SPEC.md) §3):
272
+
273
+ \`\`\`sh
274
+ # Deploy plane
275
+ CLOUDFLARE_API_TOKEN=<wrangler OAuth or API token with workers:write>
276
+
277
+ # CF Access admin (only needed when running scripts/setup-access.mjs)
278
+ CF_API_TOKEN=<token with Access: Apps and Policies:Edit + Service Tokens:Edit>
279
+
280
+ # MCP / CLI machine auth (CF Access service token + Pylon outbound auth).
281
+ # Worker auth migrated to Pylon scoped tokens in PR #31; see
282
+ # docs/migrations/mcp-pylon.md for the operator setup.
283
+ OLAM_WORKER_URL=https://<your-worker>.workers.dev
284
+ OLAM_CF_ACCESS_CLIENT_ID=<uuid>.access
285
+ OLAM_CF_ACCESS_CLIENT_SECRET=<long secret>
286
+ OLAM_PYLON_ORG_URL=https://pylon.<your-org>.dev
287
+ OLAM_PYLON_ORG_ID=<your-org-slug>
288
+ # PYLON_SESSION_TOKEN auto-resolved from macOS Keychain / Linux Secret
289
+ # Service after \`pylon login\`; only needed in CI / headless contexts.
290
+
291
+ # Container runtime policy (optional; default=bypass)
292
+ OLAM_CLAUDE_PERMISSION_MODE=bypass # or "accept-edits" / "strict"
293
+ \`\`\`
294
+
295
+ \`\`\`bash
296
+ cd packages/cloudflare-worker
297
+ pnpm wrangler deploy
298
+ \`\`\`
299
+
300
+ Wrangler builds the container image, pushes to CF's registry, and
301
+ binds the \`Sandbox\` Durable Object + \`OLAM_CREDS\` / \`OLAM_WORKSPACES\`
302
+ KV + \`OLAM_USER_PROFILES\` R2. Authenticate once with
303
+ \`cloudflared access login https://<your-worker>.workers.dev\` — every
304
+ subsequent world auto-injects from the vault.
305
+
306
+ ---
307
+
308
+ ### Claude Code plugin (both paths)
309
+
310
+ \`\`\`bash
311
+ claude plugin install ./plugin
312
+ \`\`\`
313
+
314
+ You now have \`/olam:create\`, \`/olam:dispatch\`, \`/olam:destroy\`,
315
+ \`/olam:list\`, \`/olam:enter\`, \`/olam:status\`, \`/olam:auth_*\`, and
316
+ \`/olam:pr_*\` available in Claude Code.
317
+
318
+ ---
319
+
320
+ ### MCP server (Claude Code integration)
321
+
322
+ The CLI bundles an MCP server that exposes \`olam_create\`,
323
+ \`olam_dispatch\`, \`olam_destroy\`, \`olam_list\`, \`olam_status\`,
324
+ \`olam_enter\`, \`olam_pr\`, \`olam_observe\`, \`olam_lane_*\`, and
325
+ \`olam_capture_view\` as direct tool calls for any MCP-aware agent
326
+ runtime. Three ways to wire it in:
327
+
328
+ **One command (recommended):**
329
+
330
+ \`\`\`bash
331
+ olam mcp install # default --scope=user
332
+ olam mcp install --scope=project # writes to project .mcp.json
333
+ olam mcp uninstall # idempotent; symmetric
334
+ \`\`\`
335
+
336
+ Auto-detects whether \`olam\` is on PATH. If so, writes
337
+ \`command: "olam"\` (no \`npx\` cold-start). Otherwise falls back to
338
+ \`command: "npx", args: ["-y", "@pleri/olam-cli", "mcp", "serve"]\`.
339
+
340
+ **Paste the JSON snippet** into \`~/.claude.json\` (user scope) or a
341
+ project's \`.mcp.json\`:
342
+
343
+ \`\`\`json
344
+ {
345
+ "mcpServers": {
346
+ "olam": {
347
+ "command": "npx",
348
+ "args": ["-y", "@pleri/olam-cli", "mcp", "serve"]
349
+ }
350
+ }
351
+ }
352
+ \`\`\`
353
+
354
+ **Marketplace alias** — \`olam --mcp\` is a thin alias for
355
+ \`olam mcp serve\`. Use when an MCP-marketplace snippet expects the
356
+ bare-flag convention (\`npx -y <pkg> --mcp\`).
357
+
358
+ Restart Claude Code; verify with \`claude mcp list\` (look for \`olam\`).
359
+ Full details + version-skew + cold-start trade-offs:
360
+ [\`docs/architecture/mcp-as-npx-served.md\`](docs/architecture/mcp-as-npx-served.md).
361
+
362
+ ---
363
+
364
+ ## Usage
365
+
366
+ ### Autonomous Build: seed → world → PR gate
367
+
368
+ In Claude Code:
369
+
370
+ \`\`\`
371
+ /olam:create "audit the auth module for SSRF vulnerabilities"
372
+ \`\`\`
373
+
374
+ That's it. The plugin calls the Worker, the Worker provisions a
375
+ sandbox, creds flow in from the vault, claude-main boots and
376
+ auto-dispatches the task. The dashboard URL prints in the tool output
377
+ (\`https://<your-worker>.workers.dev/sandbox/<uuid>/\`).
378
+
379
+ Open that URL and you'll see:
380
+
381
+ - The **seed of thought** pinned at the top (immutable subject).
382
+ - A **phase progress strip** — \`created → syncing → cloning →
383
+ configuring → warming → ready → task_running\`. Warming renders a
384
+ narration of the probe: *tmux session starting → claude is booting
385
+ · waiting for prompt → shell spawn check → ready*.
386
+ - The **completion ladder** — 6 steps from \`draft\` to \`pr_opened\`,
387
+ each lit when its named actor files the event (Claude Stop hook,
388
+ Codex reviewer, audit session, gh PR-open).
389
+ - A **terminal button** (top-right) that opens a full-screen ttyd
390
+ attached to \`claude-main\`.
391
+ - A **session-health bar** that shows a red banner with the warmup
392
+ trace if anything breaks during spawn.
393
+
394
+ Everything is a projection of the authoritative state on the DO. If
395
+ the container gets evicted mid-run, the dashboard's auto-resume hook
396
+ silently re-injects creds and re-spawns claude — no Auth modal, no
397
+ lost context.
398
+
399
+ ### Lifecycle commands
400
+
401
+ | Command | Effect |
402
+ |---------|--------|
403
+ | \`/olam:create <task>\` | Provision a world, dispatch the task, return dashboard URL |
404
+ | \`/olam:list\` | Recent worlds + phases |
405
+ | \`/olam:enter <world>\` | Pop the dashboard URL for an existing world |
406
+ | \`/olam:status <world>\` | Phase + completion state + session health |
407
+ | \`/olam:dispatch <world> <task>\` | Send another prompt into the existing tmux session |
408
+ | \`/olam:destroy <world>\` | Hard destroy (DO evict + container down) |
409
+
410
+ ### Self-upgrade — pull-by-digest from GHCR
411
+
412
+ \`olam upgrade\` defaults to **pull-by-digest from GHCR**. The CLI's
413
+ pinned image-digest set is round-tripped through \`/api/version/status\`
414
+ so success means "host-cp reports the new SHA". The legacy
415
+ build-from-source path lives behind \`--from-source\` and only works in
416
+ a monorepo checkout with \`OLAM_DEV=1\`.
417
+
418
+ \`\`\`bash
419
+ olam upgrade -y # default: pull pinned digests, retag, recreate host-cp + auth + devbox
420
+ olam upgrade --rollback # restore the prior canonical tag set from :olam-rollback
421
+ olam upgrade --force # allow swap even if HEAD has drifted from captured-at-pull SHA
422
+ olam upgrade --no-cache # DOCKER_BUILD_NO_CACHE=1 across all three builds (--from-source path)
423
+ olam upgrade --history -n 5 # print the last 5 rows of ~/.olam/upgrade.log
424
+ olam upgrade --history -n 20 --json # same, JSONL — pipeable to jq
425
+ olam upgrade --branch main -y # switch branches first (refuses on dirty tree)
426
+ olam upgrade --from-source -y # legacy: rebuild all three images from monorepo source (needs OLAM_DEV=1)
427
+ \`\`\`
428
+
429
+ The success criterion is an **atomic 6-tag swap**: the prior canonical
430
+ tags (\`olam-host-cp:latest\`, \`olam-auth:latest\`, \`olam-devbox:latest\`)
431
+ are preserved as \`:olam-rollback\`, then the new pulled images become
432
+ canonical. \`auth upgrade\` follows the same pattern. Every invocation
433
+ appends a JSONL row to \`~/.olam/upgrade.log\` (verdict, captured SHA,
434
+ elapsed, exit code) so a post-mortem is always one \`--history --json\`
435
+ away.
436
+
437
+ \`olam auth upgrade\` mirrors the same default: pull-by-digest unless
438
+ \`--from-source\` is specified.
439
+
440
+ **Release cadence (post \`actions-cost-reduce\` Phase A, 2026-05-13):**
441
+ Releases are now hand-cranked. Merges to \`main\` no longer auto-publish
442
+ new images. To ship a release:
443
+
444
+ \`\`\`bash
445
+ gh release create v0.1.NNN --target main --generate-notes --title "v0.1.NNN"
446
+ \`\`\`
447
+
448
+ See [\`docs/architecture/release-flow.md\`](./docs/architecture/release-flow.md)
449
+ for the full flow — what fires under the hood, the \`[skip ci]\` semantics,
450
+ the manual \`workflow_dispatch\` escape hatch, and how to revert.
451
+
452
+ See
453
+ [\`packages/cli/skills/olam-upgrade/SKILL.md\`](packages/cli/skills/olam-upgrade/SKILL.md)
454
+ for the full flag matrix, swap-boundary semantics, and recovery
455
+ playbook.
456
+
457
+ ### Programmatic \`/session/start\`
458
+
459
+ For callers that aren't Claude Code (CI, audits, scripted batches):
460
+
461
+ \`\`\`bash
462
+ curl -X POST "$OLAM_WORKER_URL/session/start" \\
463
+ -H "Content-Type: application/json" \\
464
+ -H "CF-Access-Client-Id: $OLAM_CF_ACCESS_CLIENT_ID" \\
465
+ -H "CF-Access-Client-Secret: $OLAM_CF_ACCESS_CLIENT_SECRET" \\
466
+ -d '{
467
+ "workspace": "ein-sof",
468
+ "task": "audit the auth module for SSRF vulnerabilities",
469
+ "engineerHash": "anonymous",
470
+ "repoUrl": "workspace://ein-sof",
471
+ "branch": "main",
472
+ "userEmail": "you@example.com"
473
+ }'
474
+ \`\`\`
475
+
476
+ The \`userEmail\` override keys the vault lookup when you're calling on
477
+ behalf of a known user via a service token.
478
+
479
+ ### Dashboard SPA dev run
480
+
481
+ The canonical SPA is \`@olam/plan-chat-spa\` — host-cp's sole served
482
+ bundle (the legacy \`packages/control-plane/app/\` is sunset). Run it
483
+ locally with Vite:
484
+
485
+ \`\`\`bash
486
+ npm run dev --workspace=@olam/plan-chat-spa # Vite dev server
487
+ npm run build --workspace=@olam/plan-chat-spa # tsc -b + vite build
488
+ \`\`\`
489
+
490
+ For the CF-edge dev loop (SPA assets bundled into the Worker via
491
+ \`wrangler.jsonc\`'s \`assets\` field; served directly through CF Access
492
+ SSO), run the worker:
493
+
494
+ \`\`\`bash
495
+ npm run dev --workspace=@olam/cloudflare-worker # wrangler dev
496
+ \`\`\`
497
+
498
+ ### PLERI is optional
499
+
500
+ \`PLERI_BASE_URL\` (and the corresponding \`pleri:\` block in
501
+ \`.olam/config.yaml\`) is an **optional team-intelligence integration**;
502
+ required only for \`olam crystallize\` and team-mode features that share
503
+ thought graphs to a Pleri Plane. Fresh installs without PLERI work end
504
+ to end — \`olam create\`, \`olam dispatch\`, \`olam list\`, \`olam destroy\`
505
+ all behave normally. \`olam crystallize\` on a non-PLERI machine emits a
506
+ one-line stderr warn and exits with named code 2 (distinguishable from
507
+ exit-0 success and exit-1 errors for piped scripts), and is hidden
508
+ from \`olam --help\` until PLERI is configured. See
509
+ [\`packages/cli/src/exit-codes.ts\`](packages/cli/src/exit-codes.ts) for
510
+ the full exit-code registry.
511
+
512
+ ### Tests
513
+
514
+ This repo uses **npm workspaces** (\`npm run <script> --workspace=<pkg>\`):
515
+
516
+ \`\`\`bash
517
+ # Container + intelligence pipeline
518
+ npm run test --workspace=@olam/cloudflare-worker
519
+ npm run test --workspace=@olam/intelligence
520
+
521
+ # Core + adapters
522
+ npm run test --workspace=@olam/core
523
+
524
+ # CLI (default sweep excludes the docker-integration suite — see CLAUDE.md)
525
+ npm test --workspace=@pleri/olam-cli
526
+ npm run test:cli:integration # the excluded docker-integration suite
527
+ \`\`\`
528
+
529
+ ---
530
+
531
+ ## Paradigms
532
+
533
+ Three ideas do most of the work. Every other design choice is
534
+ downstream of these.
535
+
536
+ ### Auth container
537
+
538
+ An Olam world should boot **already authenticated to Claude**, with no
539
+ browser popup, no keychain prompt, no \`docker cp\` race. The CF path
540
+ solves this with the Worker doing OAuth at the edge and KV storing
541
+ refresh tokens. The local path mirrors that shape: a long-lived Linux
542
+ **auth container** at \`:9999\` runs the same OAuth PKCE dance, stores
543
+ tokens in a named docker volume, and serves fresh access tokens over
544
+ an HTTP API secured by a host-generated shared secret (\`~/.olam/auth-secret\`,
545
+ \`0600\`).
546
+
547
+ Lifecycle:
548
+
549
+ \`\`\`
550
+ olam auth up # start the container (idempotent)
551
+ olam auth login # one-time PKCE; opens browser, paste code back
552
+ olam auth status # show container state + valid accounts
553
+ \`\`\`
554
+
555
+ When a world is created, the docker provider injects the shared secret
556
+ as \`OLAM_AUTH_SECRET\` into the devbox container. On boot, the in-world
557
+ \`entrypoint.sh\` runs \`fetch-creds.mjs\` which fetches a fresh access
558
+ token from \`http://host.docker.internal:9999/credentials\` and writes
559
+ \`~/.claude/.credentials.json\` atomically. A background loop refreshes
560
+ every six hours. The refresh token never leaves the auth container.
561
+
562
+ **Provenance, not ambient.** The token a world uses is provably the
563
+ token the auth container issued — same first-14 bytes, same
564
+ \`expiresAt\`, written after \`fetch-creds\` runs. \`gh\` tokens are a
565
+ separate concern (baked into the devbox image).
566
+
567
+ Full CF ↔ local parity lives in
568
+ [\`docs/architecture/\`](./docs/architecture/README.md) and
569
+ [\`packages/auth-service/\`](./packages/auth-service/).
570
+
571
+ ### KG-service container
572
+
573
+ For symbol-shaped queries ("who calls \`Cart#submit\`?", "where is
574
+ \`hydrateOrder\` defined?") agents should reach for a knowledge graph,
575
+ not \`grep\`. Olam ships a long-running **\`olam-kg-service\` container**
576
+ at \`127.0.0.1:9997\` that holds a Python HTTP server with bge-small
577
+ embeddings + a 4-layer classifier in memory. Operators install
578
+ nothing beyond Docker.
579
+
580
+ \`\`\`
581
+ olam services up # starts kg-service alongside auth + mcp-auth
582
+ olam kg classify "trace cart to order"
583
+ olam kg install-hook # writes a PreToolUse Bash hook into .claude/settings.json
584
+ olam kg doctor # 4-probe health check
585
+ \`\`\`
586
+
587
+ The hook fires on grep/find-shape Bash commands and emits
588
+ \`additionalContext\` when the classifier suggests the KG is a better
589
+ route than grep. It's fail-open: \`curl --max-time 1\` means a slow or
590
+ unreachable kg-service never blocks the agent.
591
+
592
+ **World parity.** Every world's \`entrypoint.sh\` installs the same hook
593
+ pointing at \`host.docker.internal:9997\` so devbox containers reach the
594
+ host's kg-service through Docker's host-gateway mapping. Single model
595
+ copy serves host + every world.
596
+
597
+ **Why a container, not a host CLI?** Latency. Measured options:
598
+ \`docker run --rm\` per call ≈ 43 s (dead), \`docker exec\` fresh Python
599
+ ≈ 970 ms (too slow), persistent Python HTTP server in container ≈
600
+ 37 ms p50 (viable). The full reasoning is in
601
+ [ADR 017](./docs/decisions/017-kg-service-container.md).
602
+
603
+ Architecture: [\`docs/architecture/kg-service.md\`](./docs/architecture/kg-service.md)
604
+ (operator guide, HTTP contract, troubleshooting) +
605
+ [\`docs/architecture/kg-classifier.md\`](./docs/architecture/kg-classifier.md)
606
+ (4-layer internals + accuracy bench).
607
+
608
+ ### Autonomous Build
609
+
610
+ \`\`\`
611
+ olam create --name my-world --task "describe the work"
612
+ \`\`\`
613
+
614
+ That's the whole user-facing contract. The system builds autonomously
615
+ from the seed to the PR-gate checkpoint — past that point the
616
+ [PR gate](#pr-gate) holds for human or Codex approval before anything
617
+ lands on GitHub. "Autonomous" describes the control boundary honestly:
618
+ independent from seed through branch-push, gated at PR creation.
619
+
620
+ Everything between the single command and the gate is handled
621
+ internally:
622
+
623
+ 1. **Preflight** — verifies the auth container is up and has ≥1 valid
624
+ account; fails fast with a one-line remedy if not.
625
+ 2. **World provisioning** — worktree, env, service containers, devbox.
626
+ 3. **Credential injection** — via \`fetch-creds.mjs\` on container boot
627
+ (not host-side \`docker cp\`, which raced Claude's first read).
628
+ 4. **Auto-dispatch** — task lands in the in-world Claude session.
629
+ 5. **Background refresh** — token rotates every 6 h for the life of
630
+ the world.
631
+
632
+ One call from the user. Preflight + retries + refresh + atomic writes
633
+ live inside — per Codex's adversarial note, the client intent is
634
+ singular; the orchestration stays.
635
+
636
+ ### PR gate
637
+
638
+ "Autonomous" only holds up to the gate. Every Claude-driven
639
+ \`gh pr create\` inside a world is intercepted:
640
+
641
+ \`\`\`
642
+ olam pr list # every pending gate across every world
643
+ olam pr show <id> # full diff + commit log + command
644
+ olam pr approve <id> --reason "lgtm" # let gh pr create proceed
645
+ olam pr reject <id> --reason "leaks" # hook exits 2, Claude sees a tool error
646
+ \`\`\`
647
+
648
+ Under the hood: a PreToolUse hook (\`/opt/olam/scripts/pr-gate-hook.mjs\`)
649
+ installed via project-level \`.claude/settings.json\` intercepts
650
+ \`gh pr create\` calls, POSTs to \`http://127.0.0.1:8080/api/pr-gate\` in
651
+ the world, and polls \`/verdict\` until a decision lands. The control
652
+ plane persists each gate to \`/workspace/.olam/pr-gates/{id}.json\`.
653
+ Codex adversarial review is the planned second decision source — when
654
+ it agrees, the gate auto-approves; when it pushes back, a human
655
+ deconflicts via \`olam pr approve/reject\` or the dashboard.
656
+
657
+ MCP parity (\`olam_pr_{list,show,approve,reject}\`) means the same
658
+ decisions can come from an agent instead of a human.
659
+
660
+ ---
661
+
662
+ ## Philosophy
663
+
664
+ 1. **The thought is the artifact.** Code is a side effect. If you can
665
+ see the reasoning — every tool call, every review verdict, every
666
+ audit result — you can debug decisions, not just bugs.
667
+
668
+ 2. **Worlds are disposable.** Create them freely. Destroy them without
669
+ remorse. The vault survives. The completion ladder's evidence
670
+ survives. Ephemeral container state does not.
671
+
672
+ 3. **Isolation is the default.** One sandbox per world. No shared
673
+ filesystem, no shared tmux, no shared port. Parallel agents work
674
+ without stepping on each other. Blast radius is always one world.
675
+
676
+ 4. **The agent manages infrastructure.** You don't learn Worker
677
+ routes, DO names, or tmux send-keys. You say "create a world for
678
+ X"; Claude does the rest.
679
+
680
+ 5. **"Done" is an explicit ladder, not an inference.** Olam encodes
681
+ completion as a monotone state machine: \`draft → recommendations →
682
+ adversarial_review → audit_passed → pr_eligible → pr_opened\`. Each
683
+ gate has a named author and captured evidence. The dashboard reads
684
+ state — it never guesses. A PR opens only when every prior gate
685
+ has filed its event.
686
+
687
+ 6. **Policy is deploy-owned, not code-baked.** \`OLAM_CLAUDE_PERMISSION_MODE\`
688
+ decides bypass vs accept-edits vs strict at the environment level;
689
+ isolation reduces blast radius but doesn't justify a hard-coded
690
+ default. Auditable, toggleable, rotatable — all without a code
691
+ change.
692
+
693
+ 7. **Everything composes.** Olam is a Claude Code plugin + a CF
694
+ Worker, not a standalone product. It augments your existing
695
+ workflow rather than replacing it.
696
+
697
+ ---
698
+
699
+ ## Status
700
+
701
+ - **\`olam setup\` is the canonical on-ramp.** Substrate-aware wizard;
702
+ default kubernetes/k3d, \`--substrate=docker\` for the lighter compose
703
+ path. It delegates to \`olam bootstrap\` on the docker path, which
704
+ pulls all three GHCR images by digest in parallel, verifies
705
+ protocol-version overlap, and drives services + auth-login to a
706
+ working stack. Exit codes \`3\` (pull failed) and \`4\` (protocol
707
+ mismatch) are explicit.
708
+ - **Self-upgrade pipeline**: pull-by-digest is the default;
709
+ \`--from-source\` is gated behind \`OLAM_DEV=1\` + monorepo. Atomic
710
+ 6-tag swap with \`--rollback\`, \`--force\`, \`--no-cache\`, and
711
+ \`--history [-n N] [--json]\` flags. JSONL audit at
712
+ \`~/.olam/upgrade.log\`.
713
+ - **GHCR release pipeline**: \`release.yml\` publishes via native
714
+ arm64 + amd64 matrix runners using \`build-push-action@v6\` and
715
+ per-arch registry cache (\`<image>-cache:{amd64,arm64}\`). Wall-clock
716
+ collapsed from 8–12 m to 3–5 m.
717
+ - **CI watchdog (Phase C)**: wake-and-dispatch with PR-identity
718
+ validation, retry budget gated on real dispatches (\`wakes\`), and
719
+ API-side log-tail fetch from
720
+ \`api.github.com/.../actions/runs/{run_id}/logs\`. Six audit findings
721
+ closed in PR #292. Canonical sequence diagram lives in
722
+ [\`docs/design/ci-watchdog.md\`](./docs/design/ci-watchdog.md).
723
+ - **CF platform**: Sandbox + DO-owned state + completion ladder
724
+ shipped. Auto-resume covers container evictions.
725
+ - **Local auth-service parity**: shipped. Same OAuth flow as CF,
726
+ tokens in docker volume, shared-secret-gated, atomic in-world
727
+ writes, 6 h refresh.
728
+ - **PR gate**: in-world hook + control-plane endpoints + \`olam pr\`
729
+ CLI + MCP tools all live; Codex verdict as a parallel decision
730
+ source is the next follow-up.
731
+ - **ReUI design system**: ReUI is the canonical primitive source
732
+ (ADR-013). The active SPA \`packages/plan-chat-spa/\` registers the
733
+ \`@reui\` registry in its \`components.json\` and wraps primitives via
734
+ thin shims in \`src/components/ui/\`. Token/design-system drift is
735
+ guarded at the repo root by \`npm run audit:tokens\`. (The earlier
736
+ Phase-0b-i ReUI seeding in \`packages/control-plane/app/\` is sunset
737
+ along with that package.)
738
+
739
+ ---
740
+
741
+ ## Read more
742
+
743
+ - [\`docs/architecture/\`](./docs/architecture/README.md) — the 9-part
744
+ deep dive on substrates, world lifecycle, vault, completion ladder,
745
+ PR gate, and parity invariants.
746
+ - [\`docs/design/ci-watchdog.md\`](./docs/design/ci-watchdog.md) — the
747
+ canonical CI watchdog design with locked invariants, threat model,
748
+ and the wake-and-dispatch sequence diagram.
749
+ - [\`assets/landing-page/wiki/\`](./assets/landing-page/wiki/) —
750
+ public-facing usage docs (\`index.md\`, \`setup.md\`, \`usage.md\`)
751
+ rendered client-side by \`wiki.html\`. The README defers detailed
752
+ setup walkthroughs there.
753
+ - [\`docs/CF_WORLDS_SPEC.md\`](./docs/CF_WORLDS_SPEC.md) — canonical
754
+ contract between CF Worker and local docker substrates.
755
+ - [\`CLAUDE.md\`](./CLAUDE.md) — engineering rules in force across the
756
+ repo: outbound Anthropic calls via \`withCredential\`, credential
757
+ vault smoke gate, and the PR-description bar (Mermaid eval block +
758
+ \`validate-pr-body.mjs\` CI check).
759
+
760
+ ---
761
+
762
+ ## License
763
+
764
+ [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) — free
765
+ to use and adapt, not for commercial use. Commercial licenses
766
+ available — contact ernest.codes@gmail.com. Or, if you want to use it
767
+ commercially, just get Claude to understand the philosophies and
768
+ recreate the project from scratch.
769
+
770
+ ---
771
+
772
+ ## Onboarding (first-run, install, getting started)
773
+
774
+ Source: \`docs/ONBOARDING.md\`
775
+
776
+ # Olam Onboarding Guide
777
+
778
+ **Goal:** Go from zero to "I can see the team's worlds" in under 15 minutes.
779
+
780
+ ---
781
+
782
+ ## Prerequisites
783
+
784
+ - **Docker daemon** running (Docker Desktop, or colima on macOS)
785
+ - **Node.js ≥ 20** (\`node --version\`)
786
+ - **Claude Code** (\`claude --version\`) — authenticated via \`claude auth login\`
787
+ - **Git** with SSH key configured for your repos
788
+
789
+ ---
790
+
791
+ ## 1. Install the CLI and bring up the stack (3 minutes)
792
+
793
+ No source checkout required — the CLI publishes to npm:
794
+
795
+ \`\`\`bash
796
+ curl -fsSL https://olam.bar.dev/install | sh # installs @pleri/olam-cli on PATH
797
+ olam setup # k3d cluster + full peripheral stack
798
+ \`\`\`
799
+
800
+ \`olam setup\` is idempotent and substrate-aware: the default brings up a
801
+ local k3d cluster (\`olam-dev\`) running host-cp, auth-service,
802
+ mcp-auth-service, kg-service, and memory-service. Pass
803
+ \`--substrate=docker\` for the lighter docker-compose path (3 containers,
804
+ no cluster). Full guide:
805
+ [\`docs/onboarding/k3s-mode-setup.md\`](onboarding/k3s-mode-setup.md).
806
+
807
+ ## 2. Register the MCP server (1 minute)
808
+
809
+ \`\`\`bash
810
+ olam mcp install # default --scope=user
811
+ olam mcp install --scope=project # writes to the project's .mcp.json
812
+ \`\`\`
813
+
814
+ This wires Olam's MCP server into Claude Code so the agent can manage
815
+ worlds directly. Core tools: \`olam_create\`, \`olam_dispatch\`,
816
+ \`olam_observe\`, \`olam_destroy\`, \`olam_list\`, \`olam_status\`,
817
+ \`olam_enter\`, \`olam_crystallize\`, \`olam_pr_*\`. Restart Claude Code and
818
+ verify with \`claude mcp list\` (look for \`olam\`).
819
+
820
+ ## 3. Configure your repos (2 minutes)
821
+
822
+ Point Olam at the repos a world should clone. Use the interactive
823
+ wizard:
824
+
825
+ \`\`\`bash
826
+ olam init # interactive: writes .olam/config.yaml
827
+ \`\`\`
828
+
829
+ A workspace config declares the repos, services, compute tier, and cost
830
+ caps for worlds spawned from it. The \`compute.default\` field selects the
831
+ world-runner tier (\`docker\` | \`cloudflare\` | \`cloudflare-isolate\`). See
832
+ [\`docs/architecture/config-spec.md\`](architecture/config-spec.md) for
833
+ the full schema.
834
+
835
+ ## 4. Create your first world (2 minutes)
836
+
837
+ In Claude Code, say:
838
+
839
+ \`\`\`
840
+ Create a world for fixing the login bug in my-project
841
+ \`\`\`
842
+
843
+ Claude will:
844
+ 1. Create a Docker container (or CF Sandbox) with your repo cloned
845
+ 2. Set up git worktrees for isolation
846
+ 3. Boot the in-world Claude session and auto-dispatch the task
847
+ 4. Return the Host CP dashboard URL (\`http://127.0.0.1:19000\`)
848
+
849
+ ## 5. Dispatch a task (1 minute)
850
+
851
+ \`\`\`
852
+ Dispatch to the world: investigate and fix the session timeout issue
853
+ \`\`\`
854
+
855
+ Claude Code runs autonomously inside the world. Every tool call, every decision, every exploration is captured as a thought node.
856
+
857
+ ## 6. Watch it work (ongoing)
858
+
859
+ **Dashboard:** Open the Host CP URL from step 4. You'll see:
860
+ - the **seed of thought** pinned at the top (the immutable task)
861
+ - a **phase progress strip** (\`created → syncing → cloning → configuring → warming → ready → task_running\`)
862
+ - the **completion ladder** — \`draft → recommendations → adversarial_review → audit_passed → pr_eligible → pr_opened\`, each step lit when its named actor files the event
863
+ - a live **trace / events** stream of the agent's reasoning
864
+
865
+ **Terminal:** Open the full-screen ttyd terminal to watch Claude's live session.
866
+
867
+ **Observe:** In Claude Code, say:
868
+ \`\`\`
869
+ What is the world thinking right now?
870
+ \`\`\`
871
+
872
+ ## 7. Clean up
873
+
874
+ \`\`\`
875
+ Crystallize and destroy the world
876
+ \`\`\`
877
+
878
+ This:
879
+ 1. Runs the intelligence pipeline (generates SessionDigest, ADRs)
880
+ 2. Persists the thought graph
881
+ 3. Destroys the container and worktrees
882
+ 4. The code lives in git branches; the thinking lives in the artifacts
883
+
884
+ ---
885
+
886
+ ## CLI Alternative
887
+
888
+ If you prefer the terminal over Claude Code (the CLI is on your PATH
889
+ after \`olam setup\`):
890
+
891
+ \`\`\`bash
892
+ olam create --name login-fix --repos my-project --task "Fix session timeout"
893
+ olam dispatch login-fix "investigate and fix the session timeout"
894
+ olam observe login-fix # placeholder; for now attach via the world terminal
895
+ olam status login-fix
896
+ olam crystallize login-fix # requires PLERI; otherwise no-op (exit 2)
897
+ olam destroy login-fix # accepts the world ID or name
898
+ \`\`\`
899
+
900
+ ## Auth (managing Claude accounts)
901
+
902
+ The auth-service comes up as part of \`olam setup\`. To add or inspect
903
+ accounts:
904
+
905
+ \`\`\`bash
906
+ olam auth login # one-time PKCE; opens browser, paste code back
907
+ olam auth status # container state + valid accounts
908
+ olam auth list # list credentials (add --json for machine-readable)
909
+ \`\`\`
910
+
911
+ The auth-service runs inside the stack (\`:9999\` on the docker-compose
912
+ substrate, or as the \`olam-auth-service\` pod on k3d) and serves fresh
913
+ tokens to every world over a shared-secret-authenticated API. The
914
+ refresh token never leaves the service.
915
+
916
+ ---
917
+
918
+ ## Key Concepts
919
+
920
+ | Concept | What it means |
921
+ |---------|--------------|
922
+ | **World** | Isolated Docker environment for one task. Has its own git branch, services, and Claude session. |
923
+ | **Thought graph** | DAG of every decision, exploration, and action during a session. The primary artifact. |
924
+ | **Dispatch** | Sending a prompt to a world's Claude session. Context is preserved between dispatches. |
925
+ | **Crystallize** | Persisting the thought graph. Happens automatically on session end, or manually. |
926
+ | **Lane** | A parallel work track within a world. Multiple lanes can work on different aspects simultaneously. |
927
+
928
+ ## Troubleshooting
929
+
930
+ | Problem | Fix |
931
+ |---------|-----|
932
+ | "Docker not running" | Start Docker Desktop |
933
+ | "No Claude credentials" | Run \`claude auth login\` on the host |
934
+ | Dashboard shows empty | Wait for the first dispatch to generate thoughts |
935
+ | "Port already in use" | Another world is running. Use \`olam list\` to check |
936
+ | Session seems stuck | Use \`olam enter <world>\` to open the terminal and check |
937
+
938
+ ## Architecture
939
+
940
+ \`\`\`
941
+ You (Claude Code) → MCP Server → World Manager → Docker / CF Sandbox
942
+ ├── Claude Code (tmux)
943
+ ├── in-world control plane (:8080)
944
+ ├── Host CP dashboard (plan-chat-spa, :19000)
945
+ └── world.db (thoughts + artifacts)
946
+ \`\`\`
947
+
948
+ For detailed architecture, see [docs/ARCHITECTURE.md](ARCHITECTURE.md).
949
+
950
+ ---
951
+
952
+ ## Setup — fresh machine
953
+
954
+ Source: \`docs/onboarding/fresh-machine-setup.md\`
955
+
956
+ # Fresh machine setup — docker compose mode
957
+
958
+ > **Audience**: a new operator setting up olam on a fresh Mac or Linux box using
959
+ > the **docker compose substrate** (3 host containers, no Kubernetes cluster).
960
+ >
961
+ > **Looking for the full k3d setup?** That is the default — see
962
+ > [\`k3s-mode-setup.md\`](./k3s-mode-setup.md).
963
+ >
964
+ > At the end of this guide you have:
965
+ >
966
+ > - olam CLI installed globally + verified
967
+ > - The 3 olam runtime containers (auth, kg-service, mcp-auth) pulled and running
968
+ > - Claude Code auth configured against your operator credential
969
+ > - Skills + agents from atlas-toolbox deployed under \`~/.claude/\` as symlinks
970
+ > - olam-meta hook blocks (memory-recall + memory-classify) sentinel-bounded inside \`~/.claude/settings.json\`
971
+ > - Memory-bridge running on \`127.0.0.1:3111\`, livez probing green
972
+ > - kg-service classifier hook installed (optional but recommended)
973
+
974
+ The recipe is **idempotent** — re-running a step is safe.
975
+
976
+ ---
977
+
978
+ ## 0. Prereqs
979
+
980
+ | Requirement | Why | How to install |
981
+ |---|---|---|
982
+ | **macOS 14+ / Linux** | Olam targets these — Windows native is not supported | n/a |
983
+ | **Node.js ≥ 20** | Runtime for the CLI + skill-source MCP servers | \`brew install node\` (mac) or \`nvm install 20\` |
984
+ | **Docker** (daemon running) | Hosts the 3 olam runtime containers | Docker Desktop (macOS) or \`sudo apt install docker.io\` (Linux); colima works too |
985
+ | **git** (with SSH key configured for your repos) | Cloning atlas-toolbox + per-world workspace mirrors | \`brew install git\` + \`ssh-keygen\` |
986
+ | **Claude Code subscription** (operator account) | What the local \`claude\` CLI consumes for HTTPS-SDK + agent-SDK calls | \`npm install -g @anthropic-ai/claude-code\` |
987
+ | **Tailscale** (optional) | Only if you'll be SSH-ing to other operators' machines | https://tailscale.com/download |
988
+
989
+ Sanity-check before continuing:
990
+
991
+ \`\`\`bash
992
+ node --version # → v20.x or higher
993
+ docker info # → Docker daemon details (no error)
994
+ git --version # → 2.x
995
+ claude --version # → ≥ 2026-04 build
996
+ \`\`\`
997
+
998
+ If any of those fail, fix that first.
999
+
1000
+ ---
1001
+
1002
+ ## 1. Install the olam CLI
1003
+
1004
+ \`\`\`bash
1005
+ curl -fsSL https://olam.bar.dev/install | sh
1006
+ olam --version # → 0.1.166 (or newer)
1007
+ \`\`\`
1008
+
1009
+ Or directly via npm:
1010
+
1011
+ \`\`\`bash
1012
+ npm install -g @pleri/olam-cli@latest
1013
+ \`\`\`
1014
+
1015
+ This brings down \`olam\`, \`olam-mcp\`, and a thin bundle of node modules. No Docker pulls happen yet.
1016
+
1017
+ ---
1018
+
1019
+ ## 2. Bootstrap the olam stack (docker compose mode)
1020
+
1021
+ \`\`\`bash
1022
+ olam setup --substrate=docker
1023
+ \`\`\`
1024
+
1025
+ This is the heaviest step (~3-8 minutes on first run). It:
1026
+
1027
+ 1. Pulls 3 container images from \`ghcr.io/pleri/\`:
1028
+ - \`olam-auth\` — the Claude-auth proxy your CLI shells against (\`withCredential\` gateway).
1029
+ - \`olam-mcp-auth\` — host-side MCP wrapper for Claude auth.
1030
+ - \`olam-kg-service\` — knowledge-graph + classifier sidecar (port \`127.0.0.1:9997\`).
1031
+ 2. Initialises \`~/.olam/config.json\` (schemaVersion 1, \`host.substrate: 'compose'\`).
1032
+ 3. Starts the 3 containers via \`docker compose\`.
1033
+ 4. Prompts you to authenticate Claude Code (\`olam auth login\` runs under the hood).
1034
+
1035
+ When it finishes:
1036
+
1037
+ \`\`\`bash
1038
+ olam services status # → 3 containers RUNNING
1039
+ olam auth status # → at least 1 active credential
1040
+ \`\`\`
1041
+
1042
+ If \`olam auth status\` shows no credentials, run \`olam auth login\` and follow the prompts.
1043
+
1044
+ ---
1045
+
1046
+ ## 3. Initialise olam in your working directory
1047
+
1048
+ Pick the directory where you keep your day-to-day code (e.g. \`~/Projects/my-org/my-repo\`), then:
1049
+
1050
+ \`\`\`bash
1051
+ cd ~/Projects/my-org/my-repo
1052
+ olam init
1053
+ \`\`\`
1054
+
1055
+ This writes a per-project \`.olam/config.yaml\` so olam knows the workspace boundary.
1056
+
1057
+ Optional: skip the Pleri (analytics) prompt with \`olam init --skip-pleri\`.
1058
+
1059
+ ---
1060
+
1061
+ ## 4. Register atlas-toolbox as your skill source
1062
+
1063
+ Atlas-toolbox is the canonical ECC shared-skills repo. You need its \`.git\` URL and read access — coordinate with the operator who set it up if you don't already have SSH access to \`git@github.com:atlas-builders/atlas-toolbox.git\`.
1064
+
1065
+ \`\`\`bash
1066
+ olam skills source add \\
1067
+ --name atlas-toolbox \\
1068
+ --git-url git@github.com:atlas-builders/atlas-toolbox.git \\
1069
+ --branch master \\
1070
+ --trust \\
1071
+ --no-sync-now \\
1072
+ --no-install-hook
1073
+ \`\`\`
1074
+
1075
+ What each flag does:
1076
+
1077
+ - \`--trust\` — acknowledges that registering this source grants olam permission to symlink content into \`~/.claude/\`. Required because skill sources are a T6 capability class (they ship executable hook scripts).
1078
+ - \`--no-sync-now\` — defer the first sync until after memory-bridge is up (step 5), so the very first sync injects both halves (skill content + olam-meta blocks) in one transaction.
1079
+ - \`--no-install-hook\` — skip the legacy SessionStart hook (\`olam skills sync\` runs explicitly in step 6 instead).
1080
+
1081
+ Verify:
1082
+
1083
+ \`\`\`bash
1084
+ olam skills source list
1085
+ # → 1 skill source(s)
1086
+ # [1] <id> atlas-toolbox master (unpulled) <iso-date> git@github.com:...
1087
+ \`\`\`
1088
+
1089
+ ---
1090
+
1091
+ ## 5. Start the memory-bridge
1092
+
1093
+ The memory-bridge is a host process that serves \`127.0.0.1:3111/agentmemory/livez\`. When it's running, \`olam skills sync\` will inject the olam-meta-memory-recall + olam-meta-memory-classify hook blocks into \`~/.claude/settings.json\`. When it's NOT running, the strip half of the auto-migration still fires but no olam-meta blocks land — meaning operator gets no recall/classify behavior.
1094
+
1095
+ \`\`\`bash
1096
+ olam memory secret # → shows the bearer at ~/.olam/memory-secret (auto-generated on first run)
1097
+ olam memory start # → starts the host process; polls livez until ready
1098
+ olam memory status # → pid + livez + secret-set check
1099
+ \`\`\`
1100
+
1101
+ Sanity check the live probe:
1102
+
1103
+ \`\`\`bash
1104
+ curl -sS http://127.0.0.1:3111/agentmemory/livez
1105
+ # → {"service":"agentmemory","status":"ok"}
1106
+ \`\`\`
1107
+
1108
+ Optional: register memory as an MCP server so Claude Code can call it directly:
1109
+
1110
+ \`\`\`bash
1111
+ olam memory install --scope user
1112
+ \`\`\`
1113
+
1114
+ ---
1115
+
1116
+ ## 6. Run the first sync
1117
+
1118
+ \`\`\`bash
1119
+ olam skills sync
1120
+ \`\`\`
1121
+
1122
+ You should see output like:
1123
+
1124
+ \`\`\`
1125
+ sync summary
1126
+ sources: 1
1127
+ artifacts: ~120
1128
+ hook files: 3-5
1129
+ permission files:1-2
1130
+ symlinks made: ~250
1131
+ hooks added: 3
1132
+ permissions: ~60
1133
+ settings backup: /Users/<you>/.olam/state/settings-backups/settings-<ISO>.json
1134
+ meta-hooks: mode=auto · memory=up
1135
+ + injected: memory-recall, memory-classify
1136
+
1137
+ atlas-toolbox 120 artifacts · engineering, product, growth, design (all categories)
1138
+ ok synced 1 source(s), 120 artifact(s)
1139
+ \`\`\`
1140
+
1141
+ The load-bearing lines:
1142
+
1143
+ - \`meta-hooks: mode=auto · memory=up\` — memory-bridge probe succeeded.
1144
+ - \`+ injected: memory-recall, memory-classify\` — both olam-meta blocks are now in \`~/.claude/settings.json\`.
1145
+
1146
+ If you ran this on a machine that had been using the old atlas-toolbox \`sync.sh\` (and therefore had bare atlas-shipped agentmemory hook entries in \`~/.claude/settings.json\`), you'll also see:
1147
+
1148
+ \`\`\`
1149
+ ~ auto-migrated: stripped 2 atlas-toolbox-shipped agentmemory hook entry(ies); replaced by olam-injected blocks
1150
+ \`\`\`
1151
+
1152
+ That's the Phase C C3 auto-migration. The pre-strip state is snapshotted at \`~/.olam/state/migration-snapshots/meta-hooks-<ISO>-<pid>-<rand>.json\`. Reverse via \`olam skills migrate-hooks-back\` if you ever need to.
1153
+
1154
+ ---
1155
+
1156
+ ## 7. Install the kg-service grep classifier hook (recommended)
1157
+
1158
+ Routes \`grep\` / \`rg\` / \`find\` invocations through the kg-service classifier so search hits the knowledge graph when the question is graph-shaped.
1159
+
1160
+ \`\`\`bash
1161
+ olam kg install-hook --scope user # writes the sentinel-bound hook to ~/.claude/settings.json
1162
+ olam kg doctor # validates the hook + the kg-service container
1163
+ \`\`\`
1164
+
1165
+ Open a new Claude Code session to pick up the hook (existing sessions snapshot settings.json at start).
1166
+
1167
+ ---
1168
+
1169
+ ## 8. Verify the end state
1170
+
1171
+ \`\`\`bash
1172
+ # (a) Skill sources registered
1173
+ olam skills source list
1174
+
1175
+ # (b) Skill artifacts deployed (symlinks into ~/.olam/state/skill-sources/<id>/...)
1176
+ ls -la ~/.claude/skills/ | head -10
1177
+ ls -la ~/.claude/agents/ | head -10
1178
+
1179
+ # (c) olam-meta sentinel-bound hook blocks in settings.json
1180
+ jq -r '.hooks.PreToolUse[]?.hooks[]?.command, .hooks.PostToolUse[]?.hooks[]?.command' \\
1181
+ ~/.claude/settings.json | grep -E "olam-meta-memory|kg-service-v2"
1182
+ # expected output (3 lines):
1183
+ # OLAM_META_SENTINEL=olam-meta-memory-recall-v1; ... agentmemory-recall-trigger.mjs
1184
+ # OLAM_META_SENTINEL=olam-meta-memory-classify-v1; ... agentmemory-classify-queue.mjs
1185
+ # KG_SENTINEL=kg-service-v2-classifier-hook; ... 127.0.0.1:9997/classify
1186
+
1187
+ # (d) Memory-bridge live
1188
+ curl -sS http://127.0.0.1:3111/agentmemory/livez
1189
+ # → {"service":"agentmemory","status":"ok"}
1190
+
1191
+ # (e) Trust-audit log entries (one per skill-source-add + per meta-hook-stripped)
1192
+ tail -3 ~/.olam/state/skill-sources-audit.log | jq -c '{timestamp, action, sourceId}'
1193
+ \`\`\`
1194
+
1195
+ All five should return non-empty / OK output.
1196
+
1197
+ ---
1198
+
1199
+ ## 9. (Optional) Open a Claude Code session and test recall
1200
+
1201
+ \`\`\`bash
1202
+ cd ~/Projects/my-org/my-repo
1203
+ claude # opens a Claude Code session
1204
+ # Inside the session, run any bash/edit operation — the PreToolUse hook will
1205
+ # query the memory-bridge and inject \`additionalContext\` with recalled memories.
1206
+ # Look for \`[recall]\` lines in the Claude Code output.
1207
+ \`\`\`
1208
+
1209
+ If the recall hook doesn't fire, run \`olam memory status\` to confirm the bridge is up + reachable.
1210
+
1211
+ ---
1212
+
1213
+ ## Troubleshooting
1214
+
1215
+ | Symptom | Likely cause | Fix |
1216
+ |---|---|---|
1217
+ | \`olam skills sync\` errors with \`GlobalConfigReadError\` | \`~/.olam/config.json\` schemaVersion mismatch | \`olam config validate\` then \`olam bootstrap\` again |
1218
+ | \`meta-hooks: mode=auto · memory=down\` in sync output | memory-bridge not running OR probe times out | \`olam memory start\` (idempotent) + curl the livez URL |
1219
+ | Skill artifacts show as broken symlinks | atlas-toolbox clone moved or deleted | \`olam skills source pull <id>\` to re-clone |
1220
+ | \`~/.claude/settings.json\` keeps reverting | Multiple Claude Code sessions writing simultaneously | A4 lock should serialize; if it doesn't, check \`~/.olam/state/.settings-json.lock\` for a stale holder |
1221
+ | \`olam --version\` shows old version after \`npm install -g\` | nvm shim conflict OR cached PATH | \`which -a olam\` to locate; \`npm uninstall -g @pleri/olam-cli && npm install -g @pleri/olam-cli@latest\` |
1222
+
1223
+ ---
1224
+
1225
+ ## What's NOT in this doc
1226
+
1227
+ - Setting up Cloudflare-substrate worlds (separate doc: \`docs/architecture/cf-worlds-spec.md\`).
1228
+ - PLERI thought-graph integration (separate setup; skip-pleri is fine for most operators).
1229
+ - Per-project skill overrides (advanced; see Phase B B2 + \`docs/architecture/skill-source-contract.md\`).
1230
+ - Cutting an olam release (developer flow, not operator flow; see \`~/.claude/skills/olam-cut-release/SKILL.md\`).
1231
+
1232
+ ---
1233
+
1234
+ ## Reverting
1235
+
1236
+ \`\`\`bash
1237
+ # Strip olam-injected meta-hook blocks but keep skill symlinks
1238
+ olam skills migrate-hooks-back
1239
+
1240
+ # Remove the atlas-toolbox skill source entirely (deletes its clone + symlinks)
1241
+ olam skills source remove <id>
1242
+
1243
+ # Stop + remove all olam-managed Docker containers
1244
+ olam services down
1245
+
1246
+ # Uninstall the CLI
1247
+ npm uninstall -g @pleri/olam-cli
1248
+ \`\`\`
1249
+
1250
+ \`~/.olam/state/migration-snapshots/\` keeps the pre-injection settings.json snapshots indefinitely; nothing prunes them automatically.
1251
+
1252
+ ---
1253
+
1254
+ ## Architecture — the problem olam solves
1255
+
1256
+ Source: \`docs/architecture/01-problem.md\`
1257
+
1258
+ # 1 · The problem
1259
+
1260
+ ## What "run an agent" usually means
1261
+
1262
+ Today, running a coding agent means one of these:
1263
+
1264
+ 1. **Local CLI** — you install the agent on your laptop. It reads your
1265
+ files, writes your files, runs commands with your shell. Fast but:
1266
+ - Secrets, SSH keys, browser cookies — all in blast radius.
1267
+ - Agent state mixes with your state: shell history, node_modules,
1268
+ uncommitted WIP.
1269
+ - One agent at a time. No parallelism without manual isolation.
1270
+ - No audit trail beyond terminal scrollback.
1271
+
1272
+ 2. **Ephemeral cloud runner** (Actions, remote sandbox SaaS) — better
1273
+ isolation but:
1274
+ - Boot latency measured in minutes.
1275
+ - Auth per-run (paste a token each time).
1276
+ - No persistent identity: every run is a stranger.
1277
+ - The agent exits when the job exits. No interactive session.
1278
+
1279
+ 3. **Long-lived VM** — stable identity but:
1280
+ - Drift: the VM accumulates state nobody audits.
1281
+ - Hard to reset cleanly; hard to parallelise.
1282
+ - Still one host. Blast radius = the VM.
1283
+
1284
+ ## What we actually want
1285
+
1286
+ \`\`\`mermaid
1287
+ flowchart LR
1288
+ U[User] -->|one sentence| O{Olam}
1289
+ O -->|provisions| W[Isolated world]
1290
+ W -->|runs| A[Agent]
1291
+ A -->|work + trail| R[Reviewed PR]
1292
+ R -->|back to| U
1293
+
1294
+ style W fill:#1e1e24,stroke:#4f6aff,color:#e4e4e7
1295
+ style R fill:#0c0c0f,stroke:#2eaa6f,color:#e4e4e7
1296
+ \`\`\`
1297
+
1298
+ Properties the user doesn't want to negotiate:
1299
+
1300
+ - **Throw-away**: the world should be destroyable without remorse. No
1301
+ state worth keeping should live there after the run.
1302
+ - **Pre-loaded**: repos cloned, credentials injected, MCP servers
1303
+ pre-baked. Zero setup per-task.
1304
+ - **Resumable**: if the platform evicts the sandbox, I come back to it
1305
+ still knowing my context, not a login screen.
1306
+ - **Honest**: the system tells me what the agent did — every tool call,
1307
+ every edit, every review verdict — without me re-running anything.
1308
+ - **Gated**: "done" means *actually* done. Not "the agent stopped
1309
+ typing."
1310
+
1311
+ ## Why existing tools don't combine into this
1312
+
1313
+ Each tool gets one or two of those properties. None ships the full
1314
+ stack:
1315
+
1316
+ | Property | Local CLI | Actions | SaaS sandbox | Long VM | **Olam** |
1317
+ |--------------------|:---------:|:-------:|:------------:|:-------:|:--------:|
1318
+ | Isolation | ✗ | ✓ | ✓ | ✗ | ✓ |
1319
+ | Sub-second boot | ✓ | ✗ | ✗ | ✓ | ✓ |
1320
+ | Persistent identity| ✓ | ✗ | ✗ | ✓ | ✓ |
1321
+ | Resumable | ✓ | ✗ | ✗ | ✓ | ✓ |
1322
+ | Audit trail | ✗ | ~ | ~ | ✗ | ✓ |
1323
+ | Completion gates | ✗ | ✗ | ✗ | ✗ | ✓ |
1324
+
1325
+ The last row — explicit completion gates — is the one no one ships.
1326
+ That's where the real product lives.
1327
+
1328
+ ## Where olam sits
1329
+
1330
+ Olam ships in two flavors. Both expose the same world abstraction; the
1331
+ difference is where the workshop runs.
1332
+
1333
+ \`\`\`mermaid
1334
+ flowchart TB
1335
+ subgraph yourmachine ["Your machine"]
1336
+ CLI["olam CLI / MCP"]
1337
+ BR["Browser"]
1338
+ end
1339
+
1340
+ subgraph local ["Local flavor — host-cp"]
1341
+ HC["host-cp orchestrator (:9090)"]
1342
+ AS["auth-service (:9999)"]
1343
+ DB["devbox containers (per world)"]
1344
+ end
1345
+
1346
+ subgraph cloud ["Cloud flavor — Cloudflare edge"]
1347
+ W["Worker / Durable Object"]
1348
+ S["Sandbox Container"]
1349
+ end
1350
+
1351
+ subgraph dist ["Distribution"]
1352
+ NPM["npm — @ernerds/olam"]
1353
+ GHCR["GHCR — 3 multi-arch images"]
1354
+ end
1355
+
1356
+ subgraph providers ["Providers"]
1357
+ CA["Anthropic / Claude Code"]
1358
+ GH["GitHub"]
1359
+ LN["Linear"]
1360
+ end
1361
+
1362
+ NPM -->|"npm install -g"| CLI
1363
+ CLI -->|"olam bootstrap, pull by digest"| GHCR
1364
+ GHCR -->|"host-cp, auth, devbox"| HC
1365
+ GHCR --> AS
1366
+ GHCR --> DB
1367
+
1368
+ CLI -->|"local"| HC
1369
+ BR -->|"local"| HC
1370
+ HC <-->|"docker exec / ttyd"| DB
1371
+ AS -->|"PKCE + token mint"| CA
1372
+ DB -->|"withCredential"| AS
1373
+
1374
+ CLI -.->|"cloud"| W
1375
+ BR -.->|"cloud"| W
1376
+ W <-->|"RPC"| S
1377
+ W -->|"OAuth"| CA
1378
+
1379
+ DB -->|"gh"| GH
1380
+ DB --> LN
1381
+ S -->|"gh"| GH
1382
+ S --> LN
1383
+
1384
+ style HC fill:#16161a,stroke:#4f6aff,color:#e4e4e7
1385
+ style AS fill:#16161a,stroke:#eab308,color:#e4e4e7
1386
+ style DB fill:#16161a,stroke:#c084fc,color:#e4e4e7
1387
+ style W fill:#16161a,stroke:#4f6aff,color:#e4e4e7
1388
+ style S fill:#16161a,stroke:#c084fc,color:#e4e4e7
1389
+ \`\`\`
1390
+
1391
+ **Local flavor** (default for self-hosted operators).
1392
+ \`npm install -g @ernerds/olam\` lands a CLI; \`olam bootstrap\` pulls three
1393
+ images by digest from GHCR — \`olam-host-cp\`, \`olam-auth\`, \`olam-devbox\` —
1394
+ runs the protocol-version handshake, starts host-cp + auth-service, and
1395
+ walks the operator through Anthropic PKCE. Worlds are docker containers
1396
+ on the operator's own machine.
1397
+
1398
+ **Cloud flavor**. The Cloudflare Worker plus a \`@cloudflare/sandbox\`
1399
+ Durable Object plays the role host-cp plays locally. The container is
1400
+ Cloudflare-managed.
1401
+
1402
+ In both flavors a *gateway* mediates everything that can't safely live
1403
+ inside a world: OAuth token exchange, the credential vault, world
1404
+ lifecycle, and the completion ladder. The container is the *workshop*;
1405
+ the gateway (host-cp or Worker) is the *supervisor*.
1406
+
1407
+ Next: [2 · The paradigm](./02-paradigm.md) — the three ideas that make
1408
+ this actually cohere.
1409
+
1410
+ ---
1411
+
1412
+ ## Architecture — system overview
1413
+
1414
+ Source: \`docs/architecture/03-system.md\`
1415
+
1416
+ # 3 · System architecture
1417
+
1418
+ Olam runs two substrates in parallel. Both expose the same world
1419
+ abstraction; both share the same control concepts (credential vault,
1420
+ world lifecycle, completion ladder, crystallization via Pleri Plane).
1421
+ The difference is whether the workshop runs on the operator's machine
1422
+ or on Cloudflare's edge.
1423
+
1424
+ \`\`\`mermaid
1425
+ flowchart TB
1426
+ subgraph client ["Client surface"]
1427
+ Dash["Dashboard SPA — React 19 + Vite"]
1428
+ CLI["olam CLI / MCP (npm: @ernerds/olam)"]
1429
+ end
1430
+
1431
+ subgraph local ["Local flavor — host-cp on operator machine"]
1432
+ HC["host-cp orchestrator (:9090)"]
1433
+ AS["auth-service (:9999, single-container vault)"]
1434
+ DBs["devbox container per world (Node 20 + zsh + Claude Code + Codex)"]
1435
+ end
1436
+
1437
+ subgraph cloud ["Cloud flavor — Cloudflare edge"]
1438
+ CFA["CF Access SSO (JWT or service token)"]
1439
+ W["Worker · olam (routing + OAuth + orchestrator)"]
1440
+ DO[("Durable Object · OlamSandbox (phase, seed, trace, completion)")]
1441
+ KV1[("KV · OLAM_CREDS (per-user tokens)")]
1442
+ KV2[("KV · OLAM_WORKSPACES")]
1443
+ R2[("R2 · OLAM_USER_PROFILES (skill bundles)")]
1444
+ S["Sandbox container (cloudflare/sandbox:0.8.10)"]
1445
+ end
1446
+
1447
+ subgraph providers ["Providers (shared)"]
1448
+ ANT["Anthropic OAuth + token endpoint"]
1449
+ GH["GitHub · gh CLI"]
1450
+ LN["Linear MCP"]
1451
+ PP["Pleri Plane · crystallize REST API"]
1452
+ end
1453
+
1454
+ Dash -->|"HTTPS — local"| HC
1455
+ CLI -->|"HTTPS — local"| HC
1456
+ HC <-->|"docker exec, ttyd, hooks"| DBs
1457
+ HC -->|"world lifecycle, completion ladder"| HC
1458
+ DBs -->|"withCredential — observes 429"| AS
1459
+ AS -->|"PKCE, refresh, mint per-world tokens"| ANT
1460
+
1461
+ Dash -.->|"HTTPS — cloud"| CFA
1462
+ CLI -.->|"service token — cloud"| CFA
1463
+ CFA --> W
1464
+ W <-->|"RPC"| DO
1465
+ W <--> KV1
1466
+ W <--> KV2
1467
+ W <--> R2
1468
+ W <-->|"containerFetch / wsConnect"| S
1469
+ W -->|"OAuth — edge can reach providers"| ANT
1470
+
1471
+ DBs -->|"gh"| GH
1472
+ DBs --> LN
1473
+ DBs -->|"crystallize POST"| PP
1474
+ S -->|"gh"| GH
1475
+ S --> LN
1476
+ S -->|"crystallize POST"| PP
1477
+
1478
+ style HC fill:#0c0c0f,stroke:#4f6aff,color:#e4e4e7
1479
+ style AS fill:#0c0c0f,stroke:#eab308,color:#e4e4e7
1480
+ style DBs fill:#1e1e24,stroke:#2eaa6f,color:#e4e4e7
1481
+ style W fill:#0c0c0f,stroke:#4f6aff,color:#e4e4e7
1482
+ style DO fill:#0c0c0f,stroke:#a855f7,color:#e4e4e7
1483
+ style S fill:#1e1e24,stroke:#2eaa6f,color:#e4e4e7
1484
+ style KV1 fill:#0c0c0f,stroke:#eab308,color:#e4e4e7
1485
+ style PP fill:#0c0c0f,stroke:#22d3ee,color:#e4e4e7
1486
+ \`\`\`
1487
+
1488
+ ## Shared concepts
1489
+
1490
+ Both flavors implement the same quartet:
1491
+
1492
+ - **Credential vault with hot-swap**. 429s observed at the
1493
+ \`withCredential\` boundary report cooldown back to the vault, which
1494
+ rotates to the next-eligible credential on retry. Local: auth-service.
1495
+ Cloud: \`OLAM_CREDS\` KV plus the Worker's edge OAuth path. See
1496
+ [credential-hotswap.md](./credential-hotswap.md).
1497
+ - **World lifecycle state machine**. Phases —
1498
+ \`created → syncing → cloning → configuring → auth_required|warming →
1499
+ ready → task_running\` — with legal-transition guards.
1500
+ - **Completion ladder**. Explicit ladder events validated by the same
1501
+ \`completion.ts\` state machine; the ladder is what makes "done" mean
1502
+ *actually* done.
1503
+ - **Crystallization via Pleri Plane**. Thought graphs accumulated by MCP
1504
+ tools are flushed to the Pleri Plane REST API (\`POST /crystallize\`) via
1505
+ \`PleriClient\` (\`packages/core/src/pleri/\`). A single \`pleri.token\` in
1506
+ \`.olam/config.yaml\` replaces the former direct Neon database credentials.
1507
+ Thoughts buffer locally in a per-container SQLite store (\`world.db\`) if
1508
+ Pleri is unreachable or unconfigured; a world functions fully without
1509
+ Pleri, but crystallization is unavailable until connectivity is restored.
1510
+ See [ADR-004](../decisions/004-pleri-plane-replaces-neon.md).
1511
+
1512
+ ## Local-flavor components
1513
+
1514
+ ### CLI (\`@ernerds/olam\`)
1515
+ - Single npm package; \`npm install -g @ernerds/olam\` (Node 20+).
1516
+ - \`olam bootstrap\` is the sole on-ramp: docker daemon smoke → parallel
1517
+ pull of 3 images by digest from GHCR (retry-throttle-coalesce per
1518
+ Decision 16) → protocol-version handshake (\`olam.protocol.versions\`)
1519
+ → host-cp start → auth-service start → interactive PKCE.
1520
+ - \`olam upgrade\` performs the atomic 6-tag swap: \`:olam-rollback\` saves
1521
+ current canonical (\`:latest\` for host-cp + devbox, \`:local\` for auth)
1522
+ then \`:olam-next\` advances canonical. Source build is opt-in via
1523
+ \`--from-source\`, only honoured in monorepo dev mode.
1524
+ - \`olam create\` / \`dispatch\` / \`enter\` / \`pr\` etc. talk to host-cp.
1525
+
1526
+ ### host-cp (\`olam-host-cp\` image)
1527
+ - Single container running on the operator's docker daemon, port 9090.
1528
+ - Owns world lifecycle, ttyd routing, hooks ingestion, completion
1529
+ projection — the same surface the Worker exposes in cloud mode.
1530
+ - Each world is a separately-spawned \`olam-devbox\` container; host-cp
1531
+ manages the docker lifecycle and proxies the dashboard.
1532
+
1533
+ ### auth-service (\`olam-auth\` image)
1534
+ - Single container, port 9999. Holds the Anthropic refresh token in a
1535
+ local vault file (\`OLAM_AUTH_DATA_PATH\`).
1536
+ - Mints per-world access tokens via \`withCredential\`; observes 429s and
1537
+ cools down the offending credential. CI smoke: \`npm run audit:credentials\`.
1538
+ - Default canonical tag is \`:local\` (not \`:latest\`) per
1539
+ \`AuthContainerController.DEFAULT_IMAGE\`.
1540
+
1541
+ ### devbox (\`olam-devbox\` image)
1542
+ - Per-world container. Pre-baked: Claude Code CLI, Codex CLI, Slack +
1543
+ Linear MCP servers, \`gh\`, ttyd, tmux, zsh.
1544
+ - Talks to auth-service over the host docker network for credential
1545
+ retrieval — never embeds raw tokens.
1546
+ - Crystallizes thought graphs to Pleri Plane via \`PleriClient\`; buffers
1547
+ locally in \`world.db\` (SQLite) when Pleri is unreachable.
1548
+
1549
+ ## Cloud-flavor components
1550
+
1551
+ ### Worker (\`olam\`)
1552
+ - Authority for cross-world concerns: vault lookup, OAuth refresh, PKCE
1553
+ exchange (providers block container egress IPs).
1554
+ - Path-based proxy to per-session container via \`/sandbox/:id/*\`
1555
+ (\`containerFetch\` for HTTP, \`wsConnect\` for the ttyd terminal iframe).
1556
+ - \`runSessionOrchestrator\` walks the world lifecycle in \`ctx.waitUntil\`.
1557
+ - \`POST /session/:id/completion/event\` validates ladder transitions.
1558
+
1559
+ ### Durable Object (\`OlamSandbox\`)
1560
+ - One DO per world, keyed by \`sessionId\`. Extends \`@cloudflare/sandbox\`'s
1561
+ \`Sandbox\` class so it owns both container RPC and per-world state.
1562
+ - State persisted under a single \`world\` key: phase + detail + setupLog
1563
+ + \`sessionMeta\` (seedTask, vaultEmail) + bounded traceBuffer (2000) +
1564
+ completion record.
1565
+
1566
+ ### Container (cloud)
1567
+ - Based on \`cloudflare/sandbox:0.8.10\`. Same pre-baked toolchain as
1568
+ local devbox; same \`/api/*\` surface on port 8080.
1569
+ - Same \`PleriClient\` crystallization path as local devbox; Pleri token
1570
+ injected at session setup via the Worker's vault lookup.
1571
+
1572
+ ### Vault (KV + R2)
1573
+ - \`OLAM_CREDS\` — per-user Anthropic + OpenAI tokens, scopes, expiry.
1574
+ - \`OLAM_WORKSPACES\` — repo lists + defaults.
1575
+ - \`OLAM_USER_PROFILES\` — content-addressed R2 bundle of skills the
1576
+ container untars into \`~/.claude/skills/\`.
1577
+
1578
+ ## Dashboard (shared)
1579
+
1580
+ - Same React 19 + Vite + Motion 12 SPA in both flavors.
1581
+ - Local: served by host-cp; cloud: served from inside the sandbox
1582
+ container with \`<base href="/sandbox/:id/">\` injected.
1583
+ - Every panel is a projection — \`CompletionLadder\`, \`PhaseProgress\`,
1584
+ \`TracePanel\`, \`SeedCard\`, \`SessionHealthBar\` — never an inference.
1585
+
1586
+ ## Traffic matrix (who talks to whom)
1587
+
1588
+ | From → To | Local flavor | Cloud flavor |
1589
+ |----------------------------|-------------------------------|------------------------------------|
1590
+ | Browser → gateway | HTTPS to host-cp :9090 | HTTPS to Worker via CF Access |
1591
+ | CLI / MCP → gateway | HTTPS to host-cp :9090 | service token via CF Access |
1592
+ | Gateway → world container | docker exec, ttyd, HTTP hooks | \`containerFetch\` / \`wsConnect\` |
1593
+ | Hooks → gateway | HTTP POST to host-cp | HTTP POST to container :8080 |
1594
+ | Gateway → Anthropic | auth-service PKCE | Worker edge OAuth |
1595
+ | World → Anthropic | \`withCredential\` to auth-svc | injected token from KV via Worker |
1596
+ | World → GitHub | injected \`GITHUB_TOKEN\` | injected \`GITHUB_TOKEN\` |
1597
+ | World → Pleri Plane | \`PleriClient\` HTTP POST (crystallize) | \`PleriClient\` HTTP POST (crystallize) |
1598
+
1599
+ ## Substrate event delivery
1600
+
1601
+ The two substrates handle incoming GitHub events through opposite mechanisms,
1602
+ dictated by their environments.
1603
+
1604
+ **CF Worker — push (webhook).** The Worker exposes \`POST /webhooks/github\`
1605
+ (\`packages/cloudflare-worker/src/index.ts:2648\`) as a public GitHub App
1606
+ webhook receiver. GitHub delivers events to the Worker's stable public URL;
1607
+ the endpoint is HMAC-gated. CF has no persistent background processes —
1608
+ push is the only viable model.
1609
+
1610
+ **host-cp — poll.** host-cp runs \`packages/host-cp/src/pr-merge-poller.mjs\`,
1611
+ a polling loop (default 300 s interval) that queries the GitHub API to detect
1612
+ PR merges. host-cp runs on the operator's machine — often behind NAT or a
1613
+ firewall — so GitHub cannot push to it. Polling is the only viable model.
1614
+
1615
+ The two shapes are **functionally equivalent** (both detect the same events)
1616
+ but **architecturally opposite**: CF is push-based; host-cp is pull-based.
1617
+ This asymmetry is substrate-dictated, not a design gap. See
1618
+ [ADR-012](../decisions/012-substrate-event-delivery-asymmetry.md).
1619
+
1620
+ Next: [4 · World lifecycle](./04-lifecycle.md) — the phase state machine.
1621
+
1622
+ ---
1623
+
1624
+ ## Architecture — world lifecycle
1625
+
1626
+ Source: \`docs/architecture/04-lifecycle.md\`
1627
+
1628
+ # 4 · World lifecycle
1629
+
1630
+ The **phase** state machine owns provisioning. It sits one layer below
1631
+ the completion ladder — "is the world operational?" vs "is the work
1632
+ done?".
1633
+
1634
+ ## States + transitions
1635
+
1636
+ \`\`\`mermaid
1637
+ stateDiagram-v2
1638
+ [*] --> created
1639
+ created --> syncing : orchestrator starts
1640
+ syncing --> cloning : profile loaded
1641
+ cloning --> configuring : repos cloned
1642
+ configuring --> auth_required : no vault creds
1643
+ configuring --> warming : vault creds injected
1644
+ auth_required --> warming : /auth/complete
1645
+ warming --> ready : probe ACKs
1646
+ warming --> failed : probe timeout or spawn error
1647
+ ready --> task_running : auto-dispatch
1648
+ task_running --> ready : dispatch accepted
1649
+ task_running --> destroyed : user destroys
1650
+ ready --> destroyed
1651
+ failed --> destroyed
1652
+ failed --> warming : /resume
1653
+ destroyed --> [*]
1654
+ \`\`\`
1655
+
1656
+ All transitions are validated in \`src/phase.ts:isLegalTransition\`.
1657
+ Illegal transitions throw \`IllegalPhaseTransitionError\`. Self-
1658
+ transitions on the same phase are legal — used to refresh the
1659
+ \`detail\` string during long phases like \`warming\`.
1660
+
1661
+ ## The provisioning pipeline (happy path)
1662
+
1663
+ \`\`\`mermaid
1664
+ sequenceDiagram
1665
+ autonumber
1666
+ participant User
1667
+ participant Worker
1668
+ participant DO as Durable Object
1669
+ participant Sandbox as "Sandbox container"
1670
+ participant Vault as OLAM_CREDS KV
1671
+ participant Claude
1672
+
1673
+ User->>Worker: POST /session/start { task, workspace, userEmail }
1674
+ Worker->>DO: transition created → syncing
1675
+ Worker-->>User: 202 { sessionId, dashboardUrl }
1676
+ Note over Worker: remainder runs in ctx.waitUntil
1677
+
1678
+ Worker->>DO: setSessionMeta { seedTask, vaultEmail }
1679
+ Worker->>Sandbox: mkdir /home/user/workspace (via sandbox.exec)
1680
+ Worker->>Sandbox: gitCheckout repo₁ … repoₙ (parallel)
1681
+ Worker->>DO: transition → cloning (with detail)
1682
+ Worker->>Sandbox: writeFile pending-task.txt
1683
+ Worker->>DO: transition → configuring
1684
+
1685
+ Worker->>Vault: get user:<hash>:claude
1686
+ Vault-->>Worker: stored tokens
1687
+ Worker->>Worker: refreshClaudeTokens (always)
1688
+ Worker->>Vault: put refreshed tokens
1689
+ Worker->>Sandbox: writeFile ~/.claude/.credentials.json
1690
+ Worker->>Sandbox: writeFile ~/.claude/settings.json (hooks + permMode)
1691
+ Worker->>Sandbox: chown -R olam:olam /home/user
1692
+
1693
+ Worker->>DO: transition → warming
1694
+ Worker->>Sandbox: POST /api/session/warmup
1695
+ Sandbox->>Sandbox: tmux new-session -d -s claude-main -x 220 -y 50
1696
+ Sandbox->>Claude: runuser - olam -c 'claude --dangerously-skip-permissions --remote-control'
1697
+ Sandbox->>Sandbox: autoAcceptPrompts (bypass / trust / theme wizards)
1698
+
1699
+ loop Every 3s, up to 90s
1700
+ Worker->>Sandbox: POST /api/session/probe-ready
1701
+ Sandbox->>Sandbox: send nonce via ! echo <nonce> > /tmp/olam-probe-*
1702
+ Sandbox-->>Worker: {ready: true, elapsedMs}
1703
+ end
1704
+
1705
+ Worker->>DO: transition → ready
1706
+ Worker->>Sandbox: GET /api/pending-task → task
1707
+ Worker->>Sandbox: POST /dispatch { prompt: task }
1708
+ Worker->>DO: transition → task_running
1709
+ Worker->>DO: clearPendingTask
1710
+ \`\`\`
1711
+
1712
+ ## The probe
1713
+
1714
+ The \`/api/session/probe-ready\` endpoint is the system's **definition
1715
+ of "ready."** It's not a heartbeat — it actively exercises the
1716
+ capability the rest of the pipeline depends on:
1717
+
1718
+ \`\`\`mermaid
1719
+ flowchart LR
1720
+ S1["tmux has-session"] --> S2["isClaudeResponsive"]
1721
+ S2 --> S3["send-keys Escape + Ctrl+U"]
1722
+ S3 --> S4["send-keys ! echo nonce > /tmp/olam-probe-nonce"]
1723
+ S4 --> S5["send-keys Enter"]
1724
+ S5 --> S6["poll /tmp for nonce file"]
1725
+ S6 -->|match| OK["ready: true"]
1726
+ S6 -->|timeout| FAIL["ready: false (stage: nonce-ack)"]
1727
+
1728
+ style OK fill:#0c0c0f,stroke:#2eaa6f,color:#e4e4e7
1729
+ style FAIL fill:#0c0c0f,stroke:#ef4444,color:#e4e4e7
1730
+ \`\`\`
1731
+
1732
+ Why the nonce approach:
1733
+ - \`tmux has-session\` alone lies — the session can exist but claude be
1734
+ stuck on a wizard.
1735
+ - \`capture-pane | grep ❯\` lies — claude uses \`❯\` as both input cursor
1736
+ and menu cursor.
1737
+ - Actually typing into claude's bash mode and waiting for a file is
1738
+ the **same syscall path** (posix_spawn → /bin/sh) that user
1739
+ commands + hooks use. If the probe succeeds, we know the whole
1740
+ spawn surface works.
1741
+
1742
+ Each probe mints its own nonce + unique file path, so late
1743
+ acknowledgements can never falsely satisfy a future probe (no
1744
+ stale-ack race).
1745
+
1746
+ ## Resume
1747
+
1748
+ CF Sandbox containers can be evicted under idle pressure. When they
1749
+ come back, \`/home/user/.claude/*\` is empty and the claude-main tmux
1750
+ session is gone — but the DO still thinks the world is
1751
+ \`task_running\`.
1752
+
1753
+ \`\`\`mermaid
1754
+ sequenceDiagram
1755
+ autonumber
1756
+ participant Dash as Dashboard
1757
+ participant Worker
1758
+ participant DO
1759
+ participant Sandbox as "Reincarnated container"
1760
+ participant Vault
1761
+
1762
+ Dash->>Sandbox: GET /api/session-health
1763
+ Sandbox-->>Dash: { tmuxAlive: false, claudeRunning: false }
1764
+ Dash->>Dash: useAutoResume detects divergence
1765
+
1766
+ Note over Dash: also triggers when warming stalls > 60s
1767
+
1768
+ Dash->>Worker: POST /session/:id/resume
1769
+ Worker->>DO: read sessionMeta.vaultEmail
1770
+ Worker->>Vault: get + refresh creds
1771
+ Worker->>Sandbox: writeFile .credentials.json
1772
+ Worker->>DO: transition → warming (detail: resume)
1773
+ Worker->>Sandbox: POST /api/session/warmup
1774
+ Worker->>Worker: driveToReadyAndAutoDispatch (fresh waitUntil budget)
1775
+ Worker->>DO: transition → ready → task_running
1776
+ \`\`\`
1777
+
1778
+ The hook doesn't require user action — navigating back to a stale
1779
+ dashboard is enough. No Auth modal, no re-auth.
1780
+
1781
+ ## Local devbox lifecycle
1782
+
1783
+ The CF flow above is one of two flavors. The local flavor swaps DO
1784
+ + Sandbox for \`WorldManager\` + a docker container, and the SPA's
1785
+ host-cp daemon for the dashboard. The state machine is identical;
1786
+ the actors differ.
1787
+
1788
+ Key boundary: \`host-cp\` does **not** spawn devboxes. The CLI invokes
1789
+ \`WorldManager.createWorld()\` (in \`@olam/core\`) directly on the
1790
+ operator's host, then notifies host-cp so its inbox surfaces the
1791
+ world. host-cp deliberately ships without \`@olam/core\`'s native
1792
+ git/docker/sqlite deps to keep its container slim
1793
+ (\`packages/host-cp/src/server.mjs:610-680\`).
1794
+
1795
+ \`\`\`mermaid
1796
+ sequenceDiagram
1797
+ autonumber
1798
+ participant User
1799
+ participant CLI as "olam create CLI"
1800
+ participant WM as "WorldManager (@olam/core)"
1801
+ participant Docker as "Docker daemon"
1802
+ participant Devbox as "devbox container"
1803
+ participant HostCp as "host-cp daemon"
1804
+ participant Inbox as "SPA inbox"
1805
+
1806
+ User->>CLI: olam create my-world --workspace atlas
1807
+ CLI->>WM: createWorld({ name, repos, workspace, task })
1808
+ WM->>WM: resolve repos · allocate port · pick branch
1809
+ WM->>Docker: docker run olam-devbox:latest (volumes + env)
1810
+ Docker->>Devbox: container starts · CP boots on host port
1811
+ Devbox->>Devbox: git clone repos · inject vault creds
1812
+ Devbox->>Devbox: tmux new-session · spawn \`claude --remote-control\`
1813
+ WM-->>CLI: WorldMetadata { id, dashboardUrl, port }
1814
+
1815
+ Note over CLI: post-create auto-register
1816
+
1817
+ CLI->>HostCp: GET /api/bootstrap (probe + token)
1818
+ HostCp-->>CLI: 200 { token }
1819
+ CLI->>HostCp: POST /api/admin/registry { id, port }
1820
+ HostCp->>HostCp: persist ~/.olam/host-cp-registry.json
1821
+ HostCp->>Inbox: SSE world-added event
1822
+ Inbox-->>User: world card appears · "ready for dispatch"
1823
+ \`\`\`
1824
+
1825
+ If host-cp isn't running the create still succeeds —
1826
+ \`packages/cli/src/commands/create.ts\` falls through to a
1827
+ "World was created but not registered" warning with the manual
1828
+ \`olam host-cp register --world <id>\` remedy. Auto-registration is
1829
+ best-effort; the SQLite world index (\`~/.olam/worlds.db\`) is the
1830
+ source of truth and host-cp reconciles from it on startup.
1831
+
1832
+ Mode auto-detection: host-cp picks \`container\` vs \`bare\` mode by
1833
+ probing \`/.dockerenv\` (\`server.mjs:64-89\`). Container mode reaches
1834
+ per-world CPs via \`host.docker.internal:<port>\`; bare mode uses
1835
+ \`127.0.0.1:<port>\`. The same daemon binary serves both.
1836
+
1837
+ Next: [5 · Completion ladder](./05-completion.md) — the *work*
1838
+ state machine on top of this *operational* state machine.
1839
+
1840
+ ---
1841
+
1842
+ ## CLI command reference
1843
+
1844
+ Top-level commands (run \`olam <command> --help\` for flags and subcommands):
1845
+
1846
+ - \`olam add\` — Register a local repo path
1847
+ - \`olam admin\` — Admin operations (require admin secret)
1848
+ - \`olam aggregate\` — Aggregate plan stats by operator (gate #3 measurement)
1849
+ - \`olam apply\` — Create a world from a runbook (delegates to olam create)
1850
+ - \`olam apply-overlays\` — Merge ~/.claude/skills.overrides/ and ~/.claude/agents.overrides/ over upstream (section-as-unit merge per markdown-merger)
1851
+ - \`olam ask\` — Ask olam about its own usage, setup, and CLI (local Claude subscription)
1852
+ - \`olam audit-log\` — Inspect the manifest-refresh audit log (~/.olam/state/manifest-refresh-audit.jsonl).
1853
+ - \`olam auth\` — Manage the local Claude auth container
1854
+ - \`olam bake\` — Bake a source DB into the singleton as a named seed template
1855
+ - \`olam begin\` — Start the Olam host control plane (alias: olam host-cp start)
1856
+ - \`olam bind-service-token\` — Bind a Cloudflare service token to your CF Access user sub on the remote auth-worker
1857
+ - \`olam bootstrap\` — One-shot wiring of a fresh Hermes install to olam (MCP + KG hook + skill mirror)
1858
+ - \`olam build\` — Build pristine KG for a workspace (default: current dir). Routes through olam-kg-service /build endpoint. Use --pending to drain the pending queue.
1859
+ - \`olam check-ports\` — Check if runbook ports are available
1860
+ - \`olam clean\` — Reap orphan world filesystem state under ~/.olam/worlds/
1861
+ - \`olam completion\` — Emit a POSIX shell completion script for zsh or bash.
1862
+ - \`olam config\` — Manage global olam configuration
1863
+ - \`olam create\` — Create a new development world
1864
+ - \`olam crystallize\` — Crystallize thoughts from a world to Pleri Plane
1865
+ - \`olam deregister\` — Remove a world from the host CP registry (does NOT destroy the world)
1866
+ - \`olam destroy\` — Destroy a world and clean up resources (accepts world ID or name)
1867
+ - \`olam diagnose\` — Bundle diagnostics into a zip file for sharing with maintainers
1868
+ - \`olam diff\` — Show what
1869
+ - \`olam disable\` — Take a credential out of rotation (manual cooldown)
1870
+ - \`olam dispatch\` — Send a prompt to a world for execution
1871
+ - \`olam doctor\` — Run 4 diagnostic probes against the remote auth-worker
1872
+ - \`olam down\` — [deprecated] Stop the auth container — use
1873
+ - \`olam enable\` — Re-enable a disabled credential
1874
+ - \`olam enter\` — Open terminal to a world
1875
+ - \`olam evict\` — Evict oldest snapshots until total size ≤ cap (default 5GB; override via OLAM_SNAPSHOT_MAX_BYTES)
1876
+ - \`olam get\` — Print the active substrate
1877
+ - \`olam hermes\` — Hermes integration commands
1878
+ - \`olam host-cp\` — Manage the Olam host control plane container
1879
+ - \`olam implode\` — Destroy ALL local olam install + configs (containers, images, volumes, ~/.olam/, npm package). Default is dry-run.
1880
+ - \`olam init\` — Initialize olam in the current project
1881
+ - \`olam inspect\` — Diagnose warm-create cache hits/misses for a workspace (read-only; mutates nothing)
1882
+ - \`olam install\` — Pick an archetype preset for this Olam install
1883
+ - \`olam install-hook\` — Install kg-service hook (idempotent). --for hermes targets ~/.hermes/; default targets .claude/settings.json
1884
+ - \`olam issue-anthropic-token\` — Mint a new Anthropic proxy token via the remote auth-worker (g4)
1885
+ - \`olam keys\` — Manage LLM API keys stored at ~/.olam/keys.yaml
1886
+ - \`olam kg\` — Knowledge-graph operations (kg-service container)
1887
+ - \`olam lanes\` — Manage claude-lane-* tmux sessions inside a running world
1888
+ - \`olam list\` — List credentials (local by default; --remote to query a remote auth-worker)
1889
+ - \`olam list-anthropic-tokens\` — List Anthropic proxy tokens from the remote auth-worker (g4)
1890
+ - \`olam login\` — Run the OAuth PKCE flow to store a Claude account in the auth container, or print a remote OAuth URL (--remote)
1891
+ - \`olam logout\` — Remove an account from the auth container
1892
+ - \`olam logs\` — Stream application logs from a world (engine-agnostic)
1893
+ - \`olam migrate-hooks-back\` — Reverse olam-meta hook injection by restoring ~/.claude/settings.json from a B5 snapshot
1894
+ - \`olam migrate-to-remote\` — Print guidance for re-authenticating local credentials against the remote auth-worker (v1: no auto-migration of secrets)
1895
+ - \`olam observe\` — Stream thoughts from a world (coming soon)
1896
+ - \`olam onboard\` — Fresh-install umbrella: register + clone + install SessionStart hook + first sync, in one verb
1897
+ - \`olam path\` — Print the absolute path to ~/.olam/keys.yaml
1898
+ - \`olam plans\` — Manage Olam Cloud plans (list / show / rm / re-register)
1899
+ - \`olam policy-check\` — Check .olam/policies/ against the current diff
1900
+ - \`olam pr\` — Review and decide PR-gate requests from running worlds
1901
+ - \`olam prune\` — Delete shadow-backup files older than a duration (e.g. 30d) OR all of them with --all --force
1902
+ - \`olam ps\` — List running processes in a world container
1903
+ - \`olam pull\` — Fetch + reset the clone to upstream HEAD
1904
+ - \`olam refresh\` — Force-refresh an account token (substrate-aware: updates kubernetes Secret on k8s substrate)
1905
+ - \`olam register\` — Register a world with the running host CP so it appears in the unified UI
1906
+ - \`olam rekey\` — Rotate the per-world postgres password for a hybrid-mode world
1907
+ - \`olam remove\` — Permanently remove a credential (purge tokens)
1908
+ - \`olam reorder\` — Move a registered source to a new ordinal (1-indexed; mutates precedence)
1909
+ - \`olam repos\` — Manage the global repo registry
1910
+ - \`olam restart\` — Restart a world container (auto-builds agent-stream bundle when stale)
1911
+ - \`olam restore\` — Move a shadow-backup file back to its original path
1912
+ - \`olam resume\` — Resume a world by PR number, URL, or branch — finds the world that opened the PR and enters it.
1913
+ - \`olam revoke-anthropic-token\` — Revoke an Anthropic proxy token on the remote auth-worker (g4)
1914
+ - \`olam rotate-service-token\` — Revoke a service token and guide through re-binding a replacement
1915
+ - \`olam runbooks\` — Manage runbooks in the global config
1916
+ - \`olam savings\` — Show cumulative KG-hit savings tallied by the kg-service container
1917
+ - \`olam seed\` — Manage postgres seed templates on the olam-postgres singleton
1918
+ - \`olam services\` — Manage Olam service containers. Substrate-aware: compose uses docker; kubernetes uses kubectl.
1919
+ - \`olam set-prefix\` — Set the deploy prefix for a registered skill source (skills+agents deploy as <prefix>:<canonical-name>)
1920
+ - \`olam set-prefix-scope\` — Set which artifact kinds are renamed by the prefix (comma-separated: skill, agent, or skill,agent)
1921
+ - \`olam setup\` — Fresh-host onboarding wizard. Default substrate=kubernetes (k3d on all platforms):
1922
+ - \`olam setup-linux-gate-status\` — Detect whether the Linux platform expansion gate has been triggered.
1923
+ - \`olam setup-metrics\` — Query trust-audit-log for olam setup dogfood statistics.
1924
+ - \`olam shadow-backups\` — Manage
1925
+ - \`olam show\` — Show full gate detail (diff, command, commits)
1926
+ - \`olam skills\` — Manage skill sources and synchronization
1927
+ - \`olam snapshot\` — Manage world snapshots for fast boot
1928
+ - \`olam source\` — Manage registered skill sources
1929
+ - \`olam start\` — Start the host CP container (token regenerated each call)
1930
+ - \`olam status\` — [deprecated] Show container state — use
1931
+ - \`olam stop\` — Stop the host CP container + remove token + PID files
1932
+ - \`olam substrate\` — Manage deployment substrate (beta)
1933
+ - \`olam sync\` — Sync registered skill sources to ~/.claude/
1934
+ - \`olam uninstall\` — Remove /10x: chain skill symlinks from ~/.claude/skills (preserves user-authored skills + non-chain skill sources)
1935
+ - \`olam uninstall-hook\` — Remove kg-service PreToolUse hook from .claude/settings.json (sentinel-matched; surgical)
1936
+ - \`olam unset-prefix\` — Remove the deploy prefix from a registered skill source (reverts to canonical deploy names)
1937
+ - \`olam unset-prefix-scope\` — Remove the prefix-scope override from a registered skill source (reverts to default: both skill and agent are renamed)
1938
+ - \`olam up\` — [deprecated] Start the auth container — use
1939
+ - \`olam update\` — Update a registered repo
1940
+ - \`olam upgrade\` — Upgrade the olam-auth container. Default: pull olam-auth@<digest> from ghcr.io and recreate.
1941
+ - \`olam version\` — Show olam-cli version + registered chain-skill source identities
1942
+ - \`olam watch\` — Run graphify --watch against a workspace, keeping its pristine KG fresh
1943
+ - \`olam workspace\` — Manage the named catalog of repo bundles that worlds instantiate from
1944
+ - \`olam world\` — World management subcommands
1945
+ - \`olam yolo\` — Dispatch a parallel Claude Code session in a new tmux window + isolated git worktree
1946
+ `;
1947
+ //# sourceMappingURL=knowledge-pack.generated.js.map