loopat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +194 -0
  3. package/bin/loopat.mjs +65 -0
  4. package/package.json +52 -0
  5. package/server/package.json +22 -0
  6. package/server/src/api-tokens.ts +161 -0
  7. package/server/src/api-v1-openapi.ts +363 -0
  8. package/server/src/api-v1.ts +681 -0
  9. package/server/src/auth.ts +309 -0
  10. package/server/src/bootstrap.ts +113 -0
  11. package/server/src/chat.ts +390 -0
  12. package/server/src/claude-binary.ts +68 -0
  13. package/server/src/compose.ts +474 -0
  14. package/server/src/config.ts +783 -0
  15. package/server/src/files.ts +173 -0
  16. package/server/src/git-crypt-key.ts +36 -0
  17. package/server/src/git-host.ts +104 -0
  18. package/server/src/github.ts +161 -0
  19. package/server/src/index.ts +3204 -0
  20. package/server/src/kanban.ts +810 -0
  21. package/server/src/loop-stats.ts +225 -0
  22. package/server/src/loop-status.ts +67 -0
  23. package/server/src/loops.ts +1832 -0
  24. package/server/src/mcp-oauth.ts +516 -0
  25. package/server/src/onboarding.ts +105 -0
  26. package/server/src/paths.ts +190 -0
  27. package/server/src/personal-keys.ts +60 -0
  28. package/server/src/plugin-installer.ts +287 -0
  29. package/server/src/podman.ts +1216 -0
  30. package/server/src/presets.ts +30 -0
  31. package/server/src/profiles.ts +177 -0
  32. package/server/src/providers.ts +45 -0
  33. package/server/src/serve.ts +275 -0
  34. package/server/src/session.ts +1496 -0
  35. package/server/src/system-prompt.ts +90 -0
  36. package/server/src/term.ts +211 -0
  37. package/server/src/tiers.ts +762 -0
  38. package/server/src/vaults.ts +189 -0
  39. package/server/src/workspace.ts +501 -0
  40. package/server/templates/.claude-plugin/marketplace.json +13 -0
  41. package/server/templates/CLAUDE.md +78 -0
  42. package/server/templates/loop-kinds/distill/CLAUDE.md +46 -0
  43. package/server/templates/plugins/loopat/.claude-plugin/plugin.json +5 -0
  44. package/server/templates/plugins/loopat/skills/onboarding/SKILL.md +266 -0
  45. package/server/templates/plugins/loopat/skills/promote/SKILL.md +53 -0
  46. package/server/templates/sandbox/Containerfile +113 -0
  47. package/web/dist/assets/CodeEditor-BGODueTo.js +49 -0
  48. package/web/dist/assets/Editor-DMS25Vve.js +1 -0
  49. package/web/dist/assets/Markdown-CnHbW7WK.js +5 -0
  50. package/web/dist/assets/MilkdownEditor-nqo9_0v5.js +123 -0
  51. package/web/dist/assets/Terminal-BrP-ENHg.css +1 -0
  52. package/web/dist/assets/Terminal-CYWvxYam.js +174 -0
  53. package/web/dist/assets/index-DM5eO-Tv.js +163 -0
  54. package/web/dist/assets/index-DxIFezwv.css +1 -0
  55. package/web/dist/assets/w3c-keyname-BOAvb0qz.js +1 -0
  56. package/web/dist/favicon.svg +1 -0
  57. package/web/dist/index.html +14 -0
  58. package/web/dist/logo.png +0 -0
@@ -0,0 +1,1216 @@
1
+ /**
2
+ * Podman-based sandbox: one long-lived rootless container per loop. Both SDK
3
+ * CLI and PTY bash run inside the same container via `podman exec`, so they
4
+ * share PID / Mount / IPC namespaces — the terminal can `ps` and see what the
5
+ * AI is running, and vice versa. Idle → `podman stop` → kernel reaps the
6
+ * namespace.
7
+ *
8
+ * Naming note: this module is internal — "podman" is the implementation
9
+ * mechanism. User-facing concept stays "sandbox" (see docs/sandbox.md).
10
+ *
11
+ * Key decisions:
12
+ * - Base image is `loopat-sandbox:latest`, built locally on first run from
13
+ * server/templates/sandbox/Containerfile (FROM ubuntu:24.04 + bash +
14
+ * coreutils + util-linux + procps + less). Keeps the image small + boring
15
+ * — every "heavy" tool (claude binary, node, mise, host caches) is bound
16
+ * in from the host at container-create time via --volume. Glibc inside
17
+ * the image matches the host (both Ubuntu 24.04 lineage), so host-built
18
+ * binaries Just Work.
19
+ * - slirp4netns (default rootless): each container gets a private IP
20
+ * (10.0.2.x); outbound API calls via NAT, inbound via container IP.
21
+ * - --userns=keep-id: host uid is mapped to the same uid inside, so files
22
+ * created by the AI are owned by the user on the host too. Rootless
23
+ * subuid/subgid mappings (see /etc/subuid) make this work.
24
+ * - --init: podman auto-injects catatonit (or tini) as PID 1 so zombies
25
+ * from orphaned background processes get reaped.
26
+ * - Long-lived container with `sleep infinity` as the main command. Both
27
+ * SDK and PTY are `podman exec` siblings of this.
28
+ *
29
+ * Two mount-authority tiers (same model as bwrap):
30
+ * - operator: ~/.dashscope/config.json `mounts` (any host path)
31
+ * - member: convention-based via `vaults/<v>/mounts/home/<rel>/...` → $HOME/<rel>/...
32
+ * - admin: no mount capability
33
+ *
34
+ * See memory: project_loop_dir_is_sandbox.md
35
+ */
36
+ import { execFile, spawn } from "node:child_process"
37
+ import { createHash } from "node:crypto"
38
+ import { existsSync } from "node:fs"
39
+ import { copyFile, mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"
40
+ import { homedir, tmpdir } from "node:os"
41
+ import { join } from "node:path"
42
+ import { promisify } from "node:util"
43
+ import {
44
+ WORKSPACE,
45
+ loopWorkdir,
46
+ loopClaudeDir,
47
+ loopsDir,
48
+ loopContextChatDir,
49
+ workspaceKnowledgeDir,
50
+ workspaceNotesDir,
51
+ workspaceReposDir,
52
+ loopContextKnowledge,
53
+ loopContextNotes,
54
+ personalDir,
55
+ LOOPAT_INSTALL_DIR,
56
+ loopHomeUpper,
57
+ workspaceHomeSkelDir,
58
+ loopDir,
59
+ } from "./paths"
60
+ import { loadConfig } from "./config"
61
+ import { DEFAULT_VAULT, listVaultHomeMounts } from "./vaults"
62
+
63
+ const execFileP = promisify(execFile)
64
+
65
+ // ── Virtual paths (kept identical to bwrap era so AI doctrine still applies) ──
66
+ export const V_LOOP = (id: string) => `/loopat/loop/${id}`
67
+ export const V_LOOP_WORKDIR = (id: string) => `/loopat/loop/${id}/workdir`
68
+ export const V_LOOP_CLAUDE = (id: string) => `/loopat/loop/${id}/.claude`
69
+ export const V_ALL_LOOPS = "/loopat/loops"
70
+ export const V_CONTEXT_KNOWLEDGE = "/loopat/context/knowledge"
71
+ export const V_CONTEXT_NOTES = "/loopat/context/notes"
72
+ export const V_CONTEXT_NOTES_MEMORY = "/loopat/context/notes/memory"
73
+ export const V_CONTEXT_PERSONAL = "/loopat/context/personal"
74
+ export const V_CONTEXT_PERSONAL_MEMORY = "/loopat/context/personal/memory"
75
+ export const V_CONTEXT_REPOS = "/loopat/context/repos"
76
+ export const V_CONTEXT_CHAT = "/loopat/context/chat"
77
+
78
+ // $HOME inside the container. Deliberately NOT host's homedir — if we bound
79
+ // host's $HOME at its real path, podman would auto-create parent dirs for
80
+ // every nested bind (LOOPAT_HOME, LOOPAT_INSTALL_DIR, etc. all live under
81
+ // host $HOME in typical installs), and those intermediate dirs end up owned
82
+ // by a subuid that the user can't delete from the host. With $HOME under
83
+ // /loopat/ — outside the host's homedir tree — every host-absolute bind
84
+ // sits beside it, never inside.
85
+ export const V_HOME = (user: string) => `/loopat/home/${user}`
86
+
87
+ // Label keys for podman inspect.
88
+ const LABEL_LOOP = "loopat.loop-id"
89
+ const LABEL_WORKSPACE = "loopat.workspace"
90
+ const LABEL_CONFIG_HASH = "loopat.config-hash"
91
+
92
+ // Image used as the base for every loop container. Built locally from
93
+ // server/templates/sandbox/Containerfile via ensureSandboxImage().
94
+ export const SANDBOX_IMAGE = process.env.LOOPAT_SANDBOX_IMAGE || "loopat-sandbox:latest"
95
+
96
+ // Container name: prefix with workspace to avoid collisions between loopat
97
+ // instances running on the same host with different LOOPAT_HOME. Loop UUIDs
98
+ // are already globally unique; the prefix is for human grep.
99
+ export function containerName(loopId: string): string {
100
+ return `loopat-${WORKSPACE}-${loopId}`
101
+ }
102
+
103
+ export type ContainerOptions = {
104
+ loopId: string
105
+ createdBy: string
106
+ vaultName?: string
107
+ knowledgeRw?: boolean
108
+ mountAllLoops?: boolean
109
+ /** Extra env vars to pre-bake into the container at create time. */
110
+ extraEnv?: Record<string, string>
111
+ /** Image to create the container from. Defaults to SANDBOX_IMAGE.
112
+ * Production callers resolve a per-loop child via ensureLoopImage; tests
113
+ * may omit this and get the base. */
114
+ image?: string
115
+ /** Ephemeral port publishing: when set, the container is created with
116
+ * `-p :<internalPort>[/<proto>]` so the kernel assigns a random host
117
+ * port. Host port is queried via `podman port` after start. Changing
118
+ * this list shifts the config hash → container recreate. */
119
+ ephemeralPorts?: { internalPort: number; protocol?: "tcp" | "udp" }[]
120
+ }
121
+
122
+ /**
123
+ * Resolve a sandbox-side path. `~` / `$HOME` resolve to V_HOME(user) — the
124
+ * sandbox's virtual home, NOT the host's homedir. Absolute paths pass through.
125
+ * Operator src side: `~` resolves to host homedir (since the operator config
126
+ * names host paths).
127
+ */
128
+ function expandSandboxPath(p: string, virtualHome: string): string {
129
+ if (p === "~" || p === "$HOME") return virtualHome
130
+ if (p.startsWith("~/")) return virtualHome + p.slice(1)
131
+ if (p.startsWith("$HOME/")) return virtualHome + p.slice("$HOME".length)
132
+ return p
133
+ }
134
+
135
+ function expandHostPath(p: string, hostHome: string): string {
136
+ if (p === "~" || p === "$HOME") return hostHome
137
+ if (p.startsWith("~/")) return hostHome + p.slice(1)
138
+ if (p.startsWith("$HOME/")) return hostHome + p.slice("$HOME".length)
139
+ return p
140
+ }
141
+
142
+ function isValidOperatorMountSrc(s: unknown): s is string {
143
+ if (typeof s !== "string" || !s) return false
144
+ if (!(s === "~" || s === "$HOME" || s.startsWith("~/") || s.startsWith("$HOME/") || s.startsWith("/"))) {
145
+ return false
146
+ }
147
+ return !s.split("/").some((seg) => seg === "..")
148
+ }
149
+
150
+ function isValidMountDst(s: unknown): s is string {
151
+ if (typeof s !== "string" || !s) return false
152
+ return s === "~" || s === "$HOME" || s.startsWith("~/") || s.startsWith("$HOME/") || s.startsWith("/")
153
+ }
154
+
155
+ export type VolumeMount = {
156
+ src: string
157
+ dst: string
158
+ /** true = read-only; false = read-write (default). */
159
+ ro?: boolean
160
+ }
161
+
162
+ /**
163
+ * Build the volume list for `podman create`. Returns the same logical bind
164
+ * set the bwrap era built, just expressed as podman --volume pairs.
165
+ *
166
+ * NOTE: this is async (loads config + checks fs for existence-conditional
167
+ * binds) but does NO container I/O.
168
+ */
169
+ export async function buildVolumeMounts(opts: ContainerOptions): Promise<VolumeMount[]> {
170
+ const hostHome = homedir()
171
+ const { loopId, createdBy, vaultName, knowledgeRw, mountAllLoops } = opts
172
+ const virtualHome = V_HOME(createdBy)
173
+ const mounts: VolumeMount[] = []
174
+
175
+ // /tmp: shared with host (for socat / mktemp / IPC sockets). Same as today.
176
+ mounts.push({ src: "/tmp", dst: "/tmp" })
177
+
178
+ // $HOME: per-loop upper layer, persistent across container restarts. We
179
+ // place it under /loopat/home/<user> instead of host's actual homedir so
180
+ // nothing nests under it. (See V_HOME comment for why.)
181
+ mounts.push({ src: loopHomeUpper(loopId), dst: virtualHome })
182
+
183
+ // Virtual mount points for AI / user:
184
+ mounts.push({ src: loopWorkdir(loopId), dst: V_LOOP_WORKDIR(loopId) })
185
+ mounts.push({ src: loopClaudeDir(loopId), dst: V_LOOP_CLAUDE(loopId) })
186
+ mounts.push({
187
+ src: loopContextKnowledge(loopId),
188
+ dst: V_CONTEXT_KNOWLEDGE,
189
+ ro: !knowledgeRw,
190
+ })
191
+ mounts.push({ src: loopContextNotes(loopId), dst: V_CONTEXT_NOTES })
192
+ mounts.push({ src: personalDir(createdBy), dst: V_CONTEXT_PERSONAL })
193
+
194
+ // Re-bind personal at the host-absolute path. compose.ts creates symlinks
195
+ // under loops/<id>/.claude/skills/<name> whose targets are host-absolute
196
+ // paths into personalDir(user); without this re-bind the targets wouldn't
197
+ // resolve inside the container.
198
+ mounts.push({ src: personalDir(createdBy), dst: personalDir(createdBy) })
199
+
200
+ // LOOPAT_INSTALL_DIR ro (claude binary + builtin plugins).
201
+ mounts.push({ src: LOOPAT_INSTALL_DIR, dst: LOOPAT_INSTALL_DIR, ro: true })
202
+
203
+ // ~/.claude/plugins/ ro-bind under the sandbox $HOME so the SDK's plugin
204
+ // resolution (which reads from ~/.claude/plugins/) finds the same set the
205
+ // host has. Source path is host's actual ~/.claude/plugins/; dst is the
206
+ // sandbox $HOME's analogue.
207
+ const hostUserPluginsDir = join(hostHome, ".claude", "plugins")
208
+ const sandboxUserPluginsDir = join(virtualHome, ".claude", "plugins")
209
+ if (existsSync(hostUserPluginsDir)) {
210
+ mounts.push({ src: hostUserPluginsDir, dst: sandboxUserPluginsDir, ro: true })
211
+ }
212
+
213
+ // Per-loop installed_plugins.json snapshot (if compose wrote one): file-
214
+ // level bind OVER the wholesale dir bind. podman --volume supports file
215
+ // binds.
216
+ const loopInstalledPlugins = join(loopClaudeDir(loopId), "plugins", "installed_plugins.json")
217
+ if (existsSync(loopInstalledPlugins)) {
218
+ mounts.push({
219
+ src: loopInstalledPlugins,
220
+ dst: join(sandboxUserPluginsDir, "installed_plugins.json"),
221
+ ro: true,
222
+ })
223
+ }
224
+
225
+ // Repos: bind at virtual path AND host-absolute path (git worktree internals
226
+ // store absolute gitdir paths). Both RW.
227
+ const reposDir = workspaceReposDir()
228
+ if (existsSync(reposDir)) {
229
+ mounts.push({ src: reposDir, dst: V_CONTEXT_REPOS })
230
+ mounts.push({ src: reposDir, dst: reposDir })
231
+ }
232
+
233
+ // notes/knowledge main repos: re-bind at host-absolute path so per-loop
234
+ // worktree `.git` files resolve.
235
+ const notesRepo = workspaceNotesDir()
236
+ if (existsSync(notesRepo)) {
237
+ mounts.push({ src: notesRepo, dst: notesRepo })
238
+ }
239
+ const knowledgeRepo = workspaceKnowledgeDir()
240
+ if (existsSync(knowledgeRepo)) {
241
+ mounts.push({ src: knowledgeRepo, dst: knowledgeRepo, ro: !knowledgeRw })
242
+ }
243
+
244
+ // chat snapshots (per-loop, ro). Only mount if populated.
245
+ const chatDir = loopContextChatDir(loopId)
246
+ if (existsSync(chatDir)) {
247
+ mounts.push({ src: chatDir, dst: V_CONTEXT_CHAT, ro: true })
248
+ }
249
+
250
+ // All-loops ro view (admin-gated): expose LOOPAT_HOME/loops/ at /loopat/loops.
251
+ if (mountAllLoops) {
252
+ mounts.push({ src: loopsDir(), dst: V_ALL_LOOPS, ro: true })
253
+ }
254
+
255
+ // Operator-tier mounts: from workspace config `mounts`. Any host path is
256
+ // fair game; operator owns the host. src is a host path (expand against
257
+ // host's home), dst is a sandbox path (expand against virtual home).
258
+ const workspaceCfg = await loadConfig()
259
+ for (const m of workspaceCfg.mounts ?? []) {
260
+ if (!isValidOperatorMountSrc(m.src) || !isValidMountDst(m.dst)) {
261
+ console.warn(`[loopat] skipping invalid workspace mount ${JSON.stringify(m)}`)
262
+ continue
263
+ }
264
+ const src = expandHostPath(m.src, hostHome)
265
+ const dst = expandSandboxPath(m.dst, virtualHome)
266
+ if (!existsSync(src)) continue // bind-try semantics
267
+ mounts.push({ src, dst, ro: !m.rw })
268
+ }
269
+
270
+ // Member-tier vault mounts: vaults/<v>/mounts/home/<top> → $HOME/<top>.
271
+ const vault = vaultName?.trim() || DEFAULT_VAULT
272
+ for (const m of listVaultHomeMounts(createdBy, vault)) {
273
+ if (!existsSync(m.src)) continue
274
+ mounts.push({ src: m.src, dst: join(virtualHome, m.rel) })
275
+ }
276
+
277
+ // No mise bind — toolchains are baked into the per-loop image instead
278
+ // (see ensureLoopImage). The image's MISE_DATA_DIR=/opt/loopat-mise lives
279
+ // outside $HOME so the home-upper overlay can't shadow installed tools.
280
+
281
+ return mounts
282
+ }
283
+
284
+ /**
285
+ * Build env-var map to bake into the container at create time.
286
+ *
287
+ * mise PATH is set by the IMAGE (ENV directives in base + per-loop child),
288
+ * not here — so the toolchain works for any process inside the container
289
+ * without needing host-side env extraction.
290
+ */
291
+ export async function buildContainerEnv(opts: ContainerOptions): Promise<Record<string, string>> {
292
+ const out: Record<string, string> = {}
293
+ // Sandbox $HOME is /loopat/home/<user> (see V_HOME comment).
294
+ out.HOME = V_HOME(opts.createdBy)
295
+ for (const [k, v] of Object.entries(opts.extraEnv ?? {})) {
296
+ out[k] = v
297
+ }
298
+ return out
299
+ }
300
+
301
+ /**
302
+ * Build the `podman create` argv (after "podman create"). The container is
303
+ * named, labeled with the loop id + a config-hash so we can detect spec
304
+ * drift and recreate when needed.
305
+ *
306
+ * The image name comes from `opts.image` when provided (typically the
307
+ * per-loop child image from ensureLoopImage); otherwise it defaults to
308
+ * the base SANDBOX_IMAGE. Callers in the production path (ensureContainer)
309
+ * always resolve via ensureLoopImage; tests that construct opts directly
310
+ * get the base image without a build step.
311
+ */
312
+ export async function buildPodmanCreateArgs(opts: ContainerOptions): Promise<string[]> {
313
+ const mounts = await buildVolumeMounts(opts)
314
+ const env = await buildContainerEnv(opts)
315
+ const home = homedir()
316
+
317
+ const args: string[] = [
318
+ "--name", containerName(opts.loopId),
319
+ "--label", `${LABEL_LOOP}=${opts.loopId}`,
320
+ "--label", `${LABEL_WORKSPACE}=${WORKSPACE}`,
321
+ // --userns=keep-id:uid=2000,gid=2000 maps whatever uid is running
322
+ // podman on the host → fixed container uid 2000. The image places
323
+ // the `loopat` user at uid 2000, so `whoami` inside is always
324
+ // "loopat" regardless of which host user owns the rootless daemon.
325
+ //
326
+ // File ownership across the boundary: container loopat ↔ host caller.
327
+ // Files we write through bind mounts are owned by the host user (the
328
+ // person who launched loopat), so they can manage them normally.
329
+ //
330
+ // Why not "USER root" instead: claude CLI refuses to run with
331
+ // --dangerously-skip-permissions when uid == 0. loopat sandboxes use
332
+ // bypassPermissions by default, so container-root is untenable for
333
+ // the SDK driver.
334
+ "--userns", "keep-id:uid=2000,gid=2000",
335
+ // Init reaps zombies from orphaned bg processes.
336
+ "--init",
337
+ // Nested rootless podman: every sandbox can run podman without a
338
+ // per-loop opt-in. --privileged is the only sustainable choice — a
339
+ // precise cap set ends up chasing one new boundary per podman release
340
+ // (NET_RAW for slirp, unmask for ro sysctls, ...). Tradeoff: outer
341
+ // container loses kernel isolation, but the userns + bind-mount
342
+ // boundary (uid 2000 ↔ host caller via keep-id) still constrains
343
+ // host damage. Sandbox doctrine here is "containerized dev env",
344
+ // not "untrusted-code prison". /dev/fuse is for the future switch
345
+ // to fuse-overlayfs storage if vfs ever bites on disk pressure.
346
+ "--privileged",
347
+ "--device", "/dev/fuse",
348
+ // Shared bridge network so the serve container can reach loop
349
+ // containers by name (aardvark-dns). Outbound API calls via NAT.
350
+ "--network", "loopat",
351
+ "--hostname", `loop-${opts.loopId.slice(0, 8)}`,
352
+ // Container cwd at creation; per-exec we override with -w.
353
+ "--workdir", V_LOOP_WORKDIR(opts.loopId),
354
+ // No interactive stdin / tty on the main process — it's just a sleeper.
355
+ ]
356
+
357
+ // Volumes.
358
+ for (const m of mounts) {
359
+ args.push("--volume", `${m.src}:${m.dst}${m.ro ? ":ro" : ""}`)
360
+ }
361
+
362
+ // Env.
363
+ for (const [k, v] of Object.entries(env)) {
364
+ args.push("--env", `${k}=${v}`)
365
+ }
366
+
367
+ // Ephemeral port publish. `-p :<inner>` tells podman to ask the kernel
368
+ // for any free host port; query `podman port` after start to learn
369
+ // which one. Different from the port-proxy path (which publishes the
370
+ // whole range up front from a separate container) — here each loop
371
+ // container directly owns its share port mapping.
372
+ for (const ep of opts.ephemeralPorts ?? []) {
373
+ const proto = ep.protocol === "udp" ? "/udp" : ""
374
+ args.push("-p", `:${ep.internalPort}${proto}`)
375
+ }
376
+
377
+ // Config hash. Covers mounts + opts but NOT env — see hashCreateArgs
378
+ // doc for why.
379
+ const hash = hashCreateArgs(mounts, opts)
380
+ args.push("--label", `${LABEL_CONFIG_HASH}=${hash}`)
381
+
382
+ // Image + command tail. The image's CMD already runs `sleep infinity`, but
383
+ // we pass it explicitly so a future image-CMD change can't accidentally
384
+ // break the long-lived semantic.
385
+ const image = opts.image ?? SANDBOX_IMAGE
386
+ args.push(image, "/bin/sleep", "infinity")
387
+ return args
388
+ }
389
+
390
+ /**
391
+ * Config hash: covers everything that, if changed, would require recreating
392
+ * the container — mounts + loop-scoped opts. Deliberately EXCLUDES the env
393
+ * map because different callers (term.ts / session.ts) legitimately pass
394
+ * different extraEnv (PTY doesn't need ANTHROPIC_API_KEY; SDK does). If we
395
+ * hashed env, those callers would force-recreate the container on every
396
+ * activity flip, killing each other's exec'd processes with SIGKILL (the
397
+ * actual bug behind "PTY exits 137 the moment a chat starts").
398
+ *
399
+ * Env still lands in `podman create --env` for convenience (so an exec
400
+ * without explicit env inherits something sane), but the values that
401
+ * actually matter at runtime should be passed at exec time anyway.
402
+ */
403
+ function hashCreateArgs(
404
+ mounts: VolumeMount[],
405
+ opts: ContainerOptions,
406
+ ): string {
407
+ const h = createHash("sha256")
408
+ h.update("v1\n")
409
+ h.update(`loop:${opts.loopId}\n`)
410
+ h.update(`createdBy:${opts.createdBy}\n`)
411
+ h.update(`vault:${opts.vaultName ?? ""}\n`)
412
+ h.update(`knowledgeRw:${opts.knowledgeRw ? "1" : "0"}\n`)
413
+ h.update(`mountAllLoops:${opts.mountAllLoops ? "1" : "0"}\n`)
414
+ for (const m of [...mounts].sort((a, b) => a.dst.localeCompare(b.dst))) {
415
+ h.update(`vol\t${m.src}\t${m.dst}\t${m.ro ? "ro" : "rw"}\n`)
416
+ }
417
+ // Ephemeral port set is part of create-args — must invalidate hash so
418
+ // toggling share rebuilds the container with new `-p` flags.
419
+ for (const ep of [...(opts.ephemeralPorts ?? [])].sort((a, b) => a.internalPort - b.internalPort)) {
420
+ h.update(`epport\t${ep.internalPort}\t${ep.protocol ?? "tcp"}\n`)
421
+ }
422
+ return h.digest("hex").slice(0, 16)
423
+ }
424
+
425
+ // ── podman binary wrapping ────────────────────────────────────────────────
426
+
427
+ const PODMAN_BIN = process.env.LOOPAT_PODMAN_BIN || "podman"
428
+
429
+ async function runPodman(
430
+ args: string[],
431
+ opts: { allowFail?: boolean; onLine?: (line: string) => void } = {},
432
+ ): Promise<{ stdout: string, stderr: string, code: number }> {
433
+ return new Promise((resolve, reject) => {
434
+ const child = spawn(PODMAN_BIN, args, { stdio: ["ignore", "pipe", "pipe"] })
435
+ let stdout = ""
436
+ let stderr = ""
437
+ const emit = (s: string) => {
438
+ const trimmed = s.trim()
439
+ if (trimmed) opts.onLine?.(trimmed)
440
+ }
441
+ child.stdout.on("data", (b: Buffer) => {
442
+ const s = b.toString()
443
+ stdout += s
444
+ const lines = s.split("\n")
445
+ for (const line of lines.slice(0, -1)) emit(line)
446
+ })
447
+ child.stderr.on("data", (b: Buffer) => {
448
+ const s = b.toString()
449
+ stderr += s
450
+ const lines = s.split("\n")
451
+ for (const line of lines.slice(0, -1)) emit(line)
452
+ })
453
+ child.on("error", (e: any) => {
454
+ if (e?.code === "ENOENT") {
455
+ reject(new Error(`podman binary not found (looked for "${PODMAN_BIN}"); install with: sudo apt install podman uidmap fuse-overlayfs`))
456
+ } else {
457
+ reject(e)
458
+ }
459
+ })
460
+ child.on("exit", (code) => {
461
+ const result = { stdout, stderr, code: code ?? -1 }
462
+ if (code === 0 || opts.allowFail) {
463
+ resolve(result)
464
+ } else {
465
+ const err: any = new Error(`podman ${args[0]} failed (exit ${code}): ${stderr.trim() || stdout.trim()}`)
466
+ err.result = result
467
+ reject(err)
468
+ }
469
+ })
470
+ })
471
+ }
472
+
473
+ export type PodmanProbeResult = {
474
+ ok: boolean
475
+ version?: string
476
+ hint?: string
477
+ }
478
+
479
+ export async function probePodman(): Promise<PodmanProbeResult> {
480
+ try {
481
+ const { stdout } = await runPodman(["--version"])
482
+ const version = stdout.trim()
483
+ return { ok: true, version }
484
+ } catch (e: any) {
485
+ return {
486
+ ok: false,
487
+ hint: e?.message?.includes("not found")
488
+ ? "install with: sudo apt install podman uidmap fuse-overlayfs"
489
+ : `podman probe failed: ${e?.message ?? e}`,
490
+ }
491
+ }
492
+ }
493
+
494
+ /**
495
+ * Ensure the loopat-sandbox base image exists in podman's local store. If
496
+ * missing, build it from server/templates/sandbox/Containerfile. The
497
+ * Containerfile is FROM ubuntu:24.04 + apt-installs basic shell tools; the
498
+ * first build pulls ubuntu:24.04 from docker.io (~78MB), subsequent
499
+ * `ensureContainer` calls reuse the cached image.
500
+ *
501
+ * Concurrency: build is idempotent at podman's layer cache, but we still
502
+ * guard with a per-process Promise so two simultaneous ensureContainer
503
+ * calls don't fire two builds.
504
+ */
505
+ /** Hash the base Containerfile content. Used both as the tag suffix for
506
+ * the base image itself and mixed into per-loop child image tags so that
507
+ * base-image changes (e.g. apt installs added to the Containerfile) cascade
508
+ * through and invalidate stale child images.
509
+ */
510
+ export async function baseContainerfileHash(): Promise<string> {
511
+ const containerfile = join(LOOPAT_INSTALL_DIR, "server", "templates", "sandbox", "Containerfile")
512
+ if (!existsSync(containerfile)) {
513
+ throw new Error(`Containerfile not found at ${containerfile}`)
514
+ }
515
+ const content = await readFile(containerfile, "utf8")
516
+ return createHash("sha256").update(content).digest("hex").slice(0, 16)
517
+ }
518
+
519
+ let _imageBuildInFlight: Promise<void> | null = null
520
+ export async function ensureSandboxImage(opts?: { onProgress?: (msg: string) => void }): Promise<void> {
521
+ if (_imageBuildInFlight) return _imageBuildInFlight
522
+ _imageBuildInFlight = (async () => {
523
+ const containerfile = join(LOOPAT_INSTALL_DIR, "server", "templates", "sandbox", "Containerfile")
524
+ if (!existsSync(containerfile)) {
525
+ throw new Error(`Cannot build sandbox image: Containerfile not found at ${containerfile}`)
526
+ }
527
+
528
+ // Hash the Containerfile so the base image auto-rebuilds when it changes.
529
+ const hash = await baseContainerfileHash()
530
+ const hashTag = `loopat-sandbox-${hash}:latest`
531
+
532
+ const present = await runPodman(["image", "exists", hashTag], { allowFail: true })
533
+ if (present.code === 0) {
534
+ // Re-tag so the unversioned SANDBOX_IMAGE name always points at the
535
+ // latest built version.
536
+ await runPodman(["tag", hashTag, SANDBOX_IMAGE], { allowFail: true })
537
+ return
538
+ }
539
+
540
+ console.log(`[podman] building sandbox image ${SANDBOX_IMAGE} (Containerfile changed or first run; may take ~30s)`)
541
+ opts?.onProgress?.("Building sandbox environment…")
542
+
543
+ // Stream build output, parsing STEP lines into progress messages.
544
+ const buildDir = join(LOOPAT_INSTALL_DIR, "server", "templates", "sandbox")
545
+ let lastStep = ""
546
+ const r = await runPodman(
547
+ ["build", "-t", SANDBOX_IMAGE, "-t", hashTag, "-f", containerfile, buildDir],
548
+ {
549
+ onLine: (line) => {
550
+ const m = line.match(/^STEP\s+(\d+)\/(\d+):\s+(.+)/)
551
+ if (m) {
552
+ lastStep = descStep(m[3])
553
+ opts?.onProgress?.(`Building sandbox: ${lastStep} (step ${m[1]}/${m[2]})`)
554
+ }
555
+ },
556
+ },
557
+ )
558
+ if (r.code !== 0) {
559
+ throw new Error(`sandbox image build failed: ${r.stderr || r.stdout}`)
560
+ }
561
+ console.log(`[podman] sandbox image ready`)
562
+ })()
563
+ try {
564
+ await _imageBuildInFlight
565
+ } finally {
566
+ _imageBuildInFlight = null
567
+ }
568
+ }
569
+
570
+ /** Translate a podman build STEP instruction into a short human label. */
571
+ function descStep(instruction: string): string {
572
+ const lower = instruction.toLowerCase()
573
+ if (lower.startsWith("from ")) return "base image"
574
+ if (lower.includes("apt-get")) return "system packages"
575
+ if (lower.includes("userdel") || lower.includes("useradd") || lower.includes("groupadd")) return "user setup"
576
+ if (lower.includes("curl") && lower.includes("mise")) return "mise tool manager"
577
+ if (lower.includes("mkdir") && lower.includes("loopat-mise")) return "mise directories"
578
+ if (lower.startsWith("env ")) return "environment"
579
+ if (lower.startsWith("user ")) return "user"
580
+ if (lower.startsWith("copy ")) return "copying config"
581
+ if (lower.startsWith("run ")) return "running setup"
582
+ if (lower.startsWith("cmd ")) return "entrypoint"
583
+ return "building"
584
+ }
585
+
586
+ /**
587
+ * Per-loop warning state set by ensureLoopImage when toolchain baking
588
+ * fails. Read by attachTerm (term.ts) to surface a yellow banner in the
589
+ * PTY so the user knows their mise.toml is broken — without losing the
590
+ * loop entirely (we fall back to the base image and keep going).
591
+ */
592
+ const _loopWarnings = new Map<string, string>()
593
+ export function getLoopWarning(loopId: string): string | undefined {
594
+ return _loopWarnings.get(loopId)
595
+ }
596
+
597
+ /**
598
+ * Ensure a per-loop image exists for this loop's composed mise.toml,
599
+ * returning its tag. Behavior:
600
+ * - no mise.toml (or empty) → base SANDBOX_IMAGE
601
+ * - mise.toml present, build OK → loopat-sandbox-<hash>:latest, clear any
602
+ * prior warning for this loop
603
+ * - mise.toml present, build FAILS → log error, stash a per-loop warning,
604
+ * fall back to base SANDBOX_IMAGE so the loop still starts. The PTY
605
+ * surfaces the warning on attach; the user can fix mise.toml and
606
+ * restart the loop to re-attempt.
607
+ *
608
+ * The tag is `loopat-sandbox-<sha256-of-mise.toml-content>:latest`, so two
609
+ * loops with the same toolchain spec share an image (and the build's mise
610
+ * install layer caches via podman layer cache). Concurrent builds of the
611
+ * same tag are coalesced via _loopImageInFlight.
612
+ */
613
+ const _loopImageInFlight = new Map<string, Promise<string>>()
614
+ export async function ensureLoopImage(loopId: string, opts?: { onProgress?: (msg: string) => void }): Promise<string> {
615
+ await ensureSandboxImage(opts)
616
+
617
+ const miseTomlPath = join(loopClaudeDir(loopId), "mise.toml")
618
+ if (!existsSync(miseTomlPath)) {
619
+ _loopWarnings.delete(loopId)
620
+ return SANDBOX_IMAGE
621
+ }
622
+ const content = await readFile(miseTomlPath, "utf8")
623
+ if (!content.trim()) {
624
+ _loopWarnings.delete(loopId)
625
+ return SANDBOX_IMAGE
626
+ }
627
+
628
+ // Hash both mise.toml AND the base Containerfile so that base-image
629
+ // changes (apt installs added, configs changed) cascade into a fresh
630
+ // child build. Without the base part, child images stay frozen against
631
+ // an old base layer set even after `loopat-sandbox:latest` is rebuilt
632
+ // — silent skew that has bitten us (e.g. podman missing from loops
633
+ // after the nested-podman base change shipped).
634
+ const baseHash = await baseContainerfileHash()
635
+ const hash = createHash("sha256").update(`base:${baseHash}\n`).update(content).digest("hex").slice(0, 16)
636
+ const tag = `loopat-sandbox-${hash}:latest`
637
+
638
+ const existing = _loopImageInFlight.get(tag)
639
+ if (existing) return existing
640
+
641
+ const built = (async () => {
642
+ const present = await runPodman(["image", "exists", tag], { allowFail: true })
643
+ if (present.code === 0) {
644
+ _loopWarnings.delete(loopId)
645
+ return tag
646
+ }
647
+
648
+ console.log(`[podman] building loop image ${tag} for loop ${loopId.slice(0, 8)}`)
649
+ opts?.onProgress?.("Installing tools from mise.toml…")
650
+ const buildDir = await mkdtemp(join(tmpdir(), "loopat-img-"))
651
+ try {
652
+ await copyFile(miseTomlPath, join(buildDir, "mise.toml"))
653
+ // Override `mise trust` interactively by marking the config path
654
+ // trusted via env. `mise install -y` installs everything in
655
+ // mise.toml; `mise reshim` ensures /opt/loopat-mise/shims/ has a
656
+ // shim for every tool.
657
+ const childContainerfile = [
658
+ `FROM ${SANDBOX_IMAGE}`,
659
+ `COPY mise.toml /opt/loopat-mise/config/config.toml`,
660
+ `RUN MISE_TRUSTED_CONFIG_PATHS=/opt/loopat-mise/config/config.toml \\`,
661
+ ` mise install -y \\`,
662
+ ` && MISE_TRUSTED_CONFIG_PATHS=/opt/loopat-mise/config/config.toml \\`,
663
+ ` mise reshim`,
664
+ ].join("\n") + "\n"
665
+ await writeFile(join(buildDir, "Containerfile"), childContainerfile)
666
+
667
+ const r = await runPodman(
668
+ ["build", "-t", tag, "-f", join(buildDir, "Containerfile"), buildDir],
669
+ {
670
+ allowFail: true,
671
+ onLine: (line) => {
672
+ const m = line.match(/^STEP\s+(\d+)\/(\d+):\s+(.+)/)
673
+ if (m) {
674
+ opts?.onProgress?.(`Installing tools: ${descStep(m[3])} (step ${m[1]}/${m[2]})`)
675
+ }
676
+ },
677
+ },
678
+ )
679
+ if (r.code !== 0) {
680
+ // Don't throw — fall back to base so the loop still starts. The
681
+ // user can inspect via terminal, fix mise.toml, and restart.
682
+ const detail = (r.stderr || r.stdout || "").trim().split("\n").slice(-3).join(" | ").slice(0, 400)
683
+ const msg = `toolchain build failed — sandbox started without baked tools. mise install rejected ${miseTomlPath}: ${detail}`
684
+ console.error(`[podman] ${msg}`)
685
+ _loopWarnings.set(loopId, msg)
686
+ return SANDBOX_IMAGE
687
+ }
688
+ console.log(`[podman] loop image ${tag} ready`)
689
+ _loopWarnings.delete(loopId)
690
+ } finally {
691
+ await rm(buildDir, { recursive: true, force: true }).catch(() => {})
692
+ }
693
+ return tag
694
+ })()
695
+ _loopImageInFlight.set(tag, built)
696
+ try {
697
+ return await built
698
+ } finally {
699
+ _loopImageInFlight.delete(tag)
700
+ }
701
+ }
702
+
703
+ type ContainerInspectRow = {
704
+ exists: boolean
705
+ running: boolean
706
+ configHash?: string
707
+ imageId?: string
708
+ }
709
+
710
+ async function inspectContainer(loopId: string): Promise<ContainerInspectRow> {
711
+ const name = containerName(loopId)
712
+ const r = await runPodman(
713
+ ["inspect", "--format", "{{.State.Running}}|{{index .Config.Labels \"" + LABEL_CONFIG_HASH + "\"}}|{{.Image}}", name],
714
+ { allowFail: true },
715
+ )
716
+ if (r.code !== 0) return { exists: false, running: false }
717
+ const [running, configHash, imageId] = r.stdout.trim().split("|")
718
+ return {
719
+ exists: true,
720
+ running: running === "true",
721
+ configHash: configHash === "<no value>" || configHash === "" ? undefined : configHash,
722
+ imageId: imageId === "<no value>" || imageId === "" ? undefined : imageId,
723
+ }
724
+ }
725
+
726
+ export async function containerExists(loopId: string): Promise<boolean> {
727
+ return (await inspectContainer(loopId)).exists
728
+ }
729
+
730
+ export async function containerRunning(loopId: string): Promise<boolean> {
731
+ return (await inspectContainer(loopId)).running
732
+ }
733
+
734
+ /** Return the container's bridge network IP, or null if not running. */
735
+ export async function getContainerIP(loopId: string): Promise<string | null> {
736
+ const name = containerName(loopId)
737
+ const r = await runPodman(
738
+ ["inspect", "--format", "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}", name],
739
+ { allowFail: true },
740
+ )
741
+ if (r.code !== 0) return null
742
+ const ip = r.stdout.trim()
743
+ if (!ip || ip === "<no value>") return null
744
+ return ip
745
+ }
746
+
747
+ /** Look up the actual host port for an ephemeral `-p :<inner>` mapping.
748
+ *
749
+ * `podman port <ct> <inner>/<proto>` prints lines like `0.0.0.0:44513`.
750
+ * Returns the first numeric port, or null if the container isn't running
751
+ * or doesn't have a mapping for that internal port. Cheap (~ms), so we
752
+ * call it on demand from the API rather than caching aggressively — the
753
+ * mapping changes only when the container is recreated.
754
+ */
755
+ export async function getEphemeralHostPort(
756
+ loopId: string,
757
+ internalPort: number,
758
+ protocol: "tcp" | "udp" = "tcp",
759
+ ): Promise<number | null> {
760
+ const name = containerName(loopId)
761
+ const r = await runPodman(
762
+ ["port", name, `${internalPort}/${protocol}`],
763
+ { allowFail: true },
764
+ )
765
+ if (r.code !== 0) return null
766
+ // First line is the v4 binding (e.g. "0.0.0.0:44513"); take it.
767
+ const first = r.stdout.split("\n").find((l) => l.trim().length > 0) ?? ""
768
+ const m = first.trim().match(/:(\d+)$/)
769
+ if (!m) return null
770
+ const port = Number(m[1])
771
+ return Number.isFinite(port) && port > 0 ? port : null
772
+ }
773
+
774
+ const LOOPAT_NETWORK = "loopat"
775
+ const SERVE_CONTAINER = `loopat-${WORKSPACE}-serve`
776
+
777
+ let _networkReady = false
778
+ let _serveReady: Promise<void> | null = null
779
+
780
+ /** Ensure the shared bridge network exists so containers can reach each other. */
781
+ export async function ensureLoopatNetwork(): Promise<void> {
782
+ if (_networkReady) return
783
+ const r = await runPodman(["network", "exists", LOOPAT_NETWORK], { allowFail: true })
784
+ if (r.code !== 0) {
785
+ console.log(`[podman] creating network ${LOOPAT_NETWORK}`)
786
+ const create = await runPodman(["network", "create", LOOPAT_NETWORK])
787
+ if (create.code !== 0) {
788
+ throw new Error(`Failed to create podman network ${LOOPAT_NETWORK}: ${create.stderr}`)
789
+ }
790
+ }
791
+ _networkReady = true
792
+ }
793
+
794
+ /** Ensure the workspace serve container is running on the shared network. */
795
+ export async function ensureServeContainer(): Promise<void> {
796
+ if (_serveReady) return _serveReady
797
+ _serveReady = (async () => {
798
+ const cfg = await loadConfig()
799
+ const enabled = cfg.serveEnabled ?? true // default on for backward compat
800
+
801
+ // Check current container state
802
+ const cur = await runPodman(
803
+ ["inspect", "--format", "{{.State.Running}}", SERVE_CONTAINER],
804
+ { allowFail: true },
805
+ )
806
+
807
+ if (!enabled) {
808
+ // Disabled — stop and remove if exists
809
+ if (cur.code === 0) {
810
+ console.log(`[podman] serve disabled, removing serve container`)
811
+ await runPodman(["stop", "--time", "5", SERVE_CONTAINER], { allowFail: true })
812
+ await runPodman(["rm", "--force", SERVE_CONTAINER], { allowFail: true })
813
+ }
814
+ _serveReady = null
815
+ return
816
+ }
817
+
818
+ await ensureLoopatNetwork()
819
+ await ensureSandboxImage()
820
+
821
+ if (cur.code === 0 && cur.stdout.trim() === "true") {
822
+ _serveReady = null
823
+ return
824
+ }
825
+
826
+ if (cur.code === 0) {
827
+ // Exists but not running — start it
828
+ console.log(`[podman] starting serve container`)
829
+ await runPodman(["start", SERVE_CONTAINER])
830
+ _serveReady = null
831
+ return
832
+ }
833
+
834
+ // Create the serve container
835
+ console.log(`[podman] creating serve container on network ${LOOPAT_NETWORK}`)
836
+ const serveBinary = join(LOOPAT_INSTALL_DIR, "server", "src", "serve-rs", "target", "release", "loopat-serve")
837
+ if (!existsSync(serveBinary)) {
838
+ _serveReady = null
839
+ throw new Error(`serve binary not found at ${serveBinary}. Run: cd server/src/serve-rs && cargo build --release`)
840
+ }
841
+
842
+ const createArgs = [
843
+ "--name", SERVE_CONTAINER,
844
+ "--network", LOOPAT_NETWORK,
845
+ "--hostname", "loopat-serve",
846
+ "--volume", `${loopsDir()}:/loopat/loops:ro`,
847
+ "--volume", `${serveBinary}:/usr/local/bin/loopat-serve:ro`,
848
+ "-p", `${SERVE_HOST}:${SERVE_PORT}:7788`,
849
+ "-e", `LOOPAT_WORKSPACE=${WORKSPACE}`,
850
+ "-e", `LOOPAT_LOOPS_DIR=/loopat/loops`,
851
+ "--init",
852
+ SANDBOX_IMAGE,
853
+ "/usr/local/bin/loopat-serve",
854
+ ]
855
+ const r = await runPodman(["create", ...createArgs])
856
+ if (r.code !== 0) {
857
+ _serveReady = null
858
+ throw new Error(`serve container create failed: ${r.stderr}`)
859
+ }
860
+ await runPodman(["start", SERVE_CONTAINER])
861
+ console.log(`[podman] serve container ready on port ${SERVE_PORT}`)
862
+ })()
863
+ try {
864
+ await _serveReady
865
+ } finally {
866
+ _serveReady = null
867
+ }
868
+ }
869
+
870
+ const PORT_PROXY_CONTAINER = `loopat-${WORKSPACE}-port-proxy`
871
+
872
+ let _portProxyReady: Promise<void> | null = null
873
+
874
+ /** Find occupied TCP ports in a range.
875
+ *
876
+ * Uses `ss` instead of `lsof`: unprivileged `lsof` only sees sockets owned
877
+ * by the current user, so cross-user services (ollama, system dashboards,
878
+ * other devs on a shared box) get missed and the port-proxy container
879
+ * fails to start with `bind: address already in use`. `ss` reads from
880
+ * /proc/net/tcp directly and shows every listening socket on the host,
881
+ * which is what we actually need to know when picking host ports to
882
+ * publish.
883
+ */
884
+ function findOccupiedPorts(lo: number, hi: number): Set<number> {
885
+ const ports = new Set<number>()
886
+ try {
887
+ const { execFileSync } = require("node:child_process")
888
+ const out = execFileSync("ss", ["-tlnH"], { encoding: "utf8", timeout: 3000, stdio: ["ignore", "pipe", "ignore"] }) as string
889
+ for (const line of out.split("\n")) {
890
+ const parts = line.trim().split(/\s+/)
891
+ if (parts.length < 4) continue
892
+ const addr = parts[3] // e.g. "0.0.0.0:8080" or "[::]:8080" or "127.0.0.1:8080"
893
+ const colonIdx = addr.lastIndexOf(":")
894
+ if (colonIdx === -1) continue
895
+ const port = Number(addr.slice(colonIdx + 1))
896
+ if (port >= lo && port <= hi) ports.add(port)
897
+ }
898
+ } catch {}
899
+ return ports
900
+ }
901
+
902
+ /** Build port-proxy create args, excluding occupiedPorts from the -p range. */
903
+ function buildPortProxyCreateArgs(binary: string, portRange: string, occupiedPorts: Set<number>): string[] {
904
+ const [lo, hi] = portRange.split("-").map(Number)
905
+ const publishArgs: string[] = []
906
+ for (let p = lo; p <= hi; p++) {
907
+ if (!occupiedPorts.has(p)) {
908
+ publishArgs.push("-p", `0.0.0.0:${p}:${p}`)
909
+ }
910
+ }
911
+ return [
912
+ "--name", PORT_PROXY_CONTAINER,
913
+ "--network", LOOPAT_NETWORK,
914
+ "--hostname", "loopat-port-proxy",
915
+ "--volume", `${loopsDir()}:/loopat/loops:ro`,
916
+ "--volume", `${binary}:/usr/local/bin/loopat-port-proxy:ro`,
917
+ ...publishArgs,
918
+ "-e", `LOOPAT_WORKSPACE=${WORKSPACE}`,
919
+ "-e", `LOOPAT_LOOPS_DIR=/loopat/loops`,
920
+ "--init",
921
+ SANDBOX_IMAGE,
922
+ "/usr/local/bin/loopat-port-proxy",
923
+ ]
924
+ }
925
+
926
+ /** Ensure the port-proxy container is running for direct TCP/UDP forwarding. */
927
+ export async function ensurePortProxyContainer(): Promise<void> {
928
+ if (_portProxyReady) return _portProxyReady
929
+ _portProxyReady = (async () => {
930
+ const cfg = await loadConfig()
931
+ const enabled = cfg.serveDynamicEnabled ?? false
932
+ const portRange = cfg.serveDynamicPortRange || (process.env.LOOPAT_EXTERNAL_PORT_RANGE ?? "10000-20000")
933
+
934
+ const cur = await runPodman(
935
+ ["inspect", "--format", "{{.State.Running}}", PORT_PROXY_CONTAINER],
936
+ { allowFail: true },
937
+ )
938
+
939
+ if (!enabled) {
940
+ if (cur.code === 0) {
941
+ console.log(`[podman] dynamic port disabled, removing port-proxy container`)
942
+ await runPodman(["stop", "--time", "5", PORT_PROXY_CONTAINER], { allowFail: true })
943
+ await runPodman(["rm", "--force", PORT_PROXY_CONTAINER], { allowFail: true })
944
+ }
945
+ _portProxyReady = null
946
+ return
947
+ }
948
+
949
+ await ensureLoopatNetwork()
950
+ await ensureSandboxImage()
951
+
952
+ if (cur.code === 0 && cur.stdout.trim() === "true") {
953
+ _portProxyReady = null
954
+ return
955
+ }
956
+
957
+ if (cur.code === 0) {
958
+ // Exists but not running. Try start first, but if it fails with a
959
+ // port conflict, fall through to recreate without occupied ports.
960
+ const startR = await runPodman(["start", PORT_PROXY_CONTAINER], { allowFail: true })
961
+ if (startR.code === 0) {
962
+ _portProxyReady = null
963
+ return
964
+ }
965
+ if (/(bind|address already in use|rootlessport)/i.test(startR.stderr + startR.stdout)) {
966
+ console.log(`[podman] existing port-proxy container has port conflicts — recreating`)
967
+ await runPodman(["rm", "--force", PORT_PROXY_CONTAINER], { allowFail: true })
968
+ } else {
969
+ _portProxyReady = null
970
+ throw new Error(`port-proxy start failed: ${startR.stderr || startR.stdout}`)
971
+ }
972
+ }
973
+
974
+ const binary = join(LOOPAT_INSTALL_DIR, "server", "src", "port-proxy-rs", "target", "release", "loopat-port-proxy")
975
+ if (!existsSync(binary)) {
976
+ _portProxyReady = null
977
+ throw new Error(`port-proxy binary not found at ${binary}. Run: cd server/src/port-proxy-rs && cargo build --release`)
978
+ }
979
+
980
+ // Use lsof to find ports already in use, then exclude them from -p.
981
+ // The port-proxy inside uses inotify for dynamic listener lifecycle —
982
+ // no container restart needed when shareExternalPort configs change.
983
+ const [lo, hi] = portRange.split("-").map(Number)
984
+ if (!lo || !hi || lo >= hi) {
985
+ _portProxyReady = null
986
+ throw new Error(`invalid port range: ${portRange}`)
987
+ }
988
+ const occupied = findOccupiedPorts(lo, hi)
989
+ if (occupied.size > 0) console.log(`[podman] ${occupied.size} port(s) in ${portRange} already in use — skipping`)
990
+
991
+ const args = buildPortProxyCreateArgs(binary, portRange, occupied)
992
+ const createR = await runPodman(["create", ...args])
993
+ if (createR.code !== 0) {
994
+ _portProxyReady = null
995
+ throw new Error(`port-proxy container create failed: ${createR.stderr}`)
996
+ }
997
+ const startR = await runPodman(["start", PORT_PROXY_CONTAINER])
998
+ if (startR.code !== 0) {
999
+ _portProxyReady = null
1000
+ throw new Error(`port-proxy start failed: ${startR.stderr}`)
1001
+ }
1002
+ const mapped = (hi - lo + 1) - occupied.size
1003
+ console.log(`[podman] port-proxy container ready (${mapped} ports in ${portRange})`)
1004
+ })()
1005
+ try {
1006
+ await _portProxyReady
1007
+ } finally {
1008
+ _portProxyReady = null
1009
+ }
1010
+ }
1011
+
1012
+ const SERVE_HOST = process.env.LOOPAT_SERVE_HOST ?? "127.0.0.1"
1013
+ const SERVE_PORT = Number(process.env.LOOPAT_SERVE_PORT ?? 7788)
1014
+
1015
+ /**
1016
+ * Idempotent: bring the container to "running with current config".
1017
+ * - missing → podman create + start
1018
+ * - stopped, hash matches → start
1019
+ * - stopped, hash drift → rm + create + start
1020
+ * - running, hash matches → no-op
1021
+ * - running, hash drift → stop + rm + create + start
1022
+ */
1023
+ export async function ensureContainer(opts: ContainerOptions, progress?: { onProgress?: (msg: string) => void }): Promise<void> {
1024
+ await ensureLoopatNetwork()
1025
+ // Resolve the image first — for loops with a composed mise.toml this
1026
+ // builds (or reuses) a per-loop child image with toolchains baked in.
1027
+ // For loops without mise.toml, this returns the base SANDBOX_IMAGE.
1028
+ const image = opts.image ?? (await ensureLoopImage(opts.loopId, progress))
1029
+ const resolvedOpts: ContainerOptions = { ...opts, image }
1030
+
1031
+ // Pre-create every bind-destination's parent dir on the host. Otherwise
1032
+ // podman auto-creates them at container start as root-in-userns, which
1033
+ // maps to subuid 100000 outside — and then the host user can't delete
1034
+ // them. The bind targets under V_HOME (e.g. .claude/plugins/) and the
1035
+ // host-upper itself are the typical culprits.
1036
+ await mkdir(loopHomeUpper(opts.loopId), { recursive: true })
1037
+ await mkdir(join(loopHomeUpper(opts.loopId), ".claude", "plugins"), { recursive: true })
1038
+ await mkdir(join(loopHomeUpper(opts.loopId), ".local", "share"), { recursive: true })
1039
+ await mkdir(loopDir(opts.loopId), { recursive: true })
1040
+
1041
+ const createArgs = await buildPodmanCreateArgs(resolvedOpts)
1042
+ // Extract hash from the args we just built.
1043
+ const hashIdx = createArgs.findIndex((a, i) =>
1044
+ createArgs[i - 1] === "--label" && a.startsWith(`${LABEL_CONFIG_HASH}=`),
1045
+ )
1046
+ const desiredHash = hashIdx >= 0 ? createArgs[hashIdx].split("=")[1] : ""
1047
+
1048
+ // Include image ID in the drift check so a rebuilt image (mise tools
1049
+ // added, base layer changed, etc.) triggers container recreation even
1050
+ // when the config hash hasn't changed.
1051
+ const curImageId = (await runPodman(["image", "inspect", "--format", "{{.Id}}", image])).stdout.trim()
1052
+
1053
+ const cur = await inspectContainer(opts.loopId)
1054
+ if (cur.running && cur.configHash === desiredHash && cur.imageId === curImageId) return
1055
+ const tag = opts.loopId.slice(0, 8)
1056
+ if (cur.exists) {
1057
+ if (cur.configHash !== desiredHash || cur.imageId !== curImageId) {
1058
+ // Spec or image drift — container has to be torn down and recreated.
1059
+ // This kills any process exec'd into the old container (PTY shells, an
1060
+ // active claude CLI). Log loudly so the cause is obvious if the user
1061
+ // reports "my terminal disconnected when I sent a chat".
1062
+ const reason = cur.configHash !== desiredHash ? "config hash drift" : "image drift (rebuilt)"
1063
+ console.warn(`[podman:${tag}] ${reason} — recreating container; any in-flight exec'd processes will be killed`)
1064
+ if (cur.running) await runPodman(["stop", "--time", "5", containerName(opts.loopId)])
1065
+ await runPodman(["rm", "--force", containerName(opts.loopId)])
1066
+ } else {
1067
+ // Hash matches; just (re)start.
1068
+ console.log(`[podman:${tag}] restarting stopped container`)
1069
+ await runPodman(["start", containerName(opts.loopId)])
1070
+ return
1071
+ }
1072
+ }
1073
+ console.log(`[podman:${tag}] creating + starting container (hash=${desiredHash})`)
1074
+ progress?.onProgress?.("Creating sandbox container…")
1075
+ await runPodman(["create", ...createArgs])
1076
+ progress?.onProgress?.("Starting sandbox container…")
1077
+ await runPodman(["start", containerName(opts.loopId)])
1078
+ }
1079
+
1080
+ export async function stopContainer(loopId: string): Promise<void> {
1081
+ const r = await runPodman(["stop", "--time", "5", containerName(loopId)], { allowFail: true })
1082
+ if (r.code !== 0 && !r.stderr.includes("no such container")) {
1083
+ console.warn(`[podman] stop ${loopId} non-zero exit (${r.code}): ${r.stderr.trim()}`)
1084
+ }
1085
+ }
1086
+
1087
+ export async function removeContainer(loopId: string): Promise<void> {
1088
+ await runPodman(["rm", "--force", containerName(loopId)], { allowFail: true })
1089
+ }
1090
+
1091
+ /**
1092
+ * Stop ALL loopat containers for this workspace. Called on server shutdown
1093
+ * so the host isn't left with hundreds of idle sandbox containers.
1094
+ */
1095
+ export async function stopAllWorkspaceContainers(): Promise<void> {
1096
+ const r = await runPodman(
1097
+ ["ps", "--all", "--filter", `label=${LABEL_WORKSPACE}=${WORKSPACE}`, "--format", "{{.Names}}"],
1098
+ { allowFail: true },
1099
+ )
1100
+ if (r.code !== 0) return
1101
+ const names = r.stdout.split("\n").map((s) => s.trim()).filter(Boolean)
1102
+ await Promise.all(names.map((n) => runPodman(["stop", "--time", "5", n], { allowFail: true })))
1103
+ }
1104
+
1105
+ // ── idle stop scheduler ───────────────────────────────────────────────────
1106
+ // Each loop can have multiple activity "sources" — e.g. "sdk" (active SDK
1107
+ // session) and "pty" (active terminal subscribers). The container stays up
1108
+ // as long as ANY source is active. When the last source goes inactive, we
1109
+ // arm an idle timer; if no source re-activates within the window, we
1110
+ // `podman stop` the container so the namespace + overlay get released.
1111
+ // User-launched background daemons (e.g. nohup server.py &) that linger
1112
+ // past all SDK/PTY sources WILL be killed when idle stop fires — this is
1113
+ // the explicit v1 trade-off (consistent with "idle = sandbox dies").
1114
+
1115
+ function containerIdleMs(): number {
1116
+ // Read env each call so tests can override per-spec (paths.ts captures
1117
+ // its env at module load, but lifecycle timing is OK to re-read).
1118
+ return Number(process.env.LOOPAT_CONTAINER_IDLE_MS) || 30 * 60 * 1000
1119
+ }
1120
+
1121
+ type ActivityRegistry = {
1122
+ /** Per-loop set of active source ids. Empty / missing = nothing active. */
1123
+ active: Map<string, Set<string>>
1124
+ /** Per-loop idle timer; clears when any source becomes active again. */
1125
+ idleTimers: Map<string, ReturnType<typeof setTimeout>>
1126
+ }
1127
+
1128
+ const registry: ActivityRegistry = {
1129
+ active: new Map(),
1130
+ idleTimers: new Map(),
1131
+ }
1132
+
1133
+ export function markActive(loopId: string, source: string): void {
1134
+ let set = registry.active.get(loopId)
1135
+ if (!set) {
1136
+ set = new Set()
1137
+ registry.active.set(loopId, set)
1138
+ }
1139
+ set.add(source)
1140
+ const t = registry.idleTimers.get(loopId)
1141
+ if (t) {
1142
+ clearTimeout(t)
1143
+ registry.idleTimers.delete(loopId)
1144
+ }
1145
+ }
1146
+
1147
+ export function markInactive(loopId: string, source: string): void {
1148
+ const set = registry.active.get(loopId)
1149
+ if (set) {
1150
+ set.delete(source)
1151
+ if (set.size === 0) registry.active.delete(loopId)
1152
+ }
1153
+ // If anything else is still active, no idle timer needed.
1154
+ if ((registry.active.get(loopId)?.size ?? 0) > 0) return
1155
+ scheduleIdleStop(loopId)
1156
+ }
1157
+
1158
+ function scheduleIdleStop(loopId: string): void {
1159
+ const existing = registry.idleTimers.get(loopId)
1160
+ if (existing) clearTimeout(existing)
1161
+ const t = setTimeout(async () => {
1162
+ registry.idleTimers.delete(loopId)
1163
+ // Re-check: someone may have grabbed activity between scheduling and firing.
1164
+ if ((registry.active.get(loopId)?.size ?? 0) > 0) return
1165
+ try {
1166
+ await stopContainer(loopId)
1167
+ console.log(`[podman] idle-stopped container for loop ${loopId.slice(0, 8)}`)
1168
+ } catch (e: any) {
1169
+ console.warn(`[podman] idle stop failed for loop ${loopId.slice(0, 8)}: ${e?.message ?? e}`)
1170
+ }
1171
+ }, containerIdleMs())
1172
+ registry.idleTimers.set(loopId, t)
1173
+ }
1174
+
1175
+ /** Test-only helper: clear all activity state + timers. */
1176
+ export function _resetActivityRegistryForTests(): void {
1177
+ for (const t of registry.idleTimers.values()) clearTimeout(t)
1178
+ registry.idleTimers.clear()
1179
+ registry.active.clear()
1180
+ }
1181
+
1182
+ /** Test-only: read current active sources for a loop. */
1183
+ export function _getActiveSourcesForTests(loopId: string): string[] {
1184
+ return [...(registry.active.get(loopId) ?? [])]
1185
+ }
1186
+
1187
+ // ── exec into the container ───────────────────────────────────────────────
1188
+
1189
+ export type ExecOptions = {
1190
+ loopId: string
1191
+ command: string
1192
+ args: string[]
1193
+ env?: Record<string, string>
1194
+ tty?: boolean
1195
+ interactive?: boolean
1196
+ workdir?: string
1197
+ }
1198
+
1199
+ /**
1200
+ * Build the `podman exec` argv (after "podman exec"). Pure: no I/O. Caller
1201
+ * spawns "podman" with the returned args.
1202
+ *
1203
+ * Note: when both `interactive` and `tty` are set, callers typically use
1204
+ * bun-pty to provide a real PTY master; podman exec passes through.
1205
+ */
1206
+ export function buildPodmanExecArgs(opts: ExecOptions): string[] {
1207
+ const args: string[] = ["exec"]
1208
+ if (opts.interactive) args.push("--interactive")
1209
+ if (opts.tty) args.push("--tty")
1210
+ if (opts.workdir) args.push("--workdir", opts.workdir)
1211
+ for (const [k, v] of Object.entries(opts.env ?? {})) {
1212
+ args.push("--env", `${k}=${v}`)
1213
+ }
1214
+ args.push(containerName(opts.loopId), opts.command, ...opts.args)
1215
+ return args
1216
+ }