@madarco/agentbox 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +125 -0
  2. package/README.md +11 -8
  3. package/dist/{_cloud-attach-HJC672UR.js → _cloud-attach-R6TRWG5L.js} +4 -4
  4. package/dist/{chunk-QYRK5H6Q.js → chunk-43Q5GWP6.js} +108 -56
  5. package/dist/chunk-43Q5GWP6.js.map +1 -0
  6. package/dist/{chunk-ECLLV5JH.js → chunk-72CJTXN6.js} +156 -5
  7. package/dist/chunk-72CJTXN6.js.map +1 -0
  8. package/dist/{chunk-R5XIDQFR.js → chunk-BKU34KYY.js} +170 -6
  9. package/dist/chunk-BKU34KYY.js.map +1 -0
  10. package/dist/{chunk-4NQXNQ53.js → chunk-E7CHS7ZR.js} +168 -58
  11. package/dist/chunk-E7CHS7ZR.js.map +1 -0
  12. package/dist/chunk-MCOU6CZS.js +346 -0
  13. package/dist/chunk-MCOU6CZS.js.map +1 -0
  14. package/dist/{chunk-B4QG2MCW.js → chunk-MLMFNN4T.js} +762 -483
  15. package/dist/chunk-MLMFNN4T.js.map +1 -0
  16. package/dist/{chunk-2LF5YILI.js → chunk-RSKG7AFU.js} +80 -6
  17. package/dist/chunk-RSKG7AFU.js.map +1 -0
  18. package/dist/{chunk-SNTHHWKY.js → chunk-XKH7NTT7.js} +80 -22
  19. package/dist/chunk-XKH7NTT7.js.map +1 -0
  20. package/dist/{dist-7KVUIKJX.js → dist-AGTIA7AD.js} +37 -226
  21. package/dist/dist-AGTIA7AD.js.map +1 -0
  22. package/dist/{dist-OPIBZ7XM.js → dist-FIFEFKJ7.js} +14 -69
  23. package/dist/dist-FIFEFKJ7.js.map +1 -0
  24. package/dist/dist-JZ3XO6EB.js +662 -0
  25. package/dist/dist-JZ3XO6EB.js.map +1 -0
  26. package/dist/{dist-OG6NW6SM.js → dist-OGJGZETZ.js} +5 -3
  27. package/dist/{dist-JAN5VABY.js → dist-S4XR4ACV.js} +25 -177
  28. package/dist/dist-S4XR4ACV.js.map +1 -0
  29. package/dist/index.js +2229 -1314
  30. package/dist/index.js.map +1 -1
  31. package/dist/{prepared-state-MQHD3M5F-KE4DT3GX.js → prepared-state-MQHD3M5F-Q27AZU53.js} +2 -2
  32. package/package.json +6 -4
  33. package/runtime/docker/Dockerfile.box +21 -26
  34. package/runtime/docker/apps/cli/share/agentbox-setup/SKILL.md +67 -1
  35. package/runtime/docker/packages/ctl/dist/bin.cjs +361 -43
  36. package/runtime/docker/packages/sandbox-docker/scripts/agentbox-vnc-start +17 -6
  37. package/runtime/docker/packages/sandbox-docker/scripts/chromium-resolver +57 -0
  38. package/runtime/docker/packages/sandbox-docker/scripts/claude-managed-settings.json +2 -1
  39. package/runtime/e2b/agentbox-checkpoint-cleanup +52 -0
  40. package/runtime/e2b/agentbox-codex-hooks.json +68 -0
  41. package/runtime/e2b/agentbox-open +28 -0
  42. package/runtime/e2b/agentbox-setup-skill.md +263 -0
  43. package/runtime/e2b/agentbox-vnc-start +102 -0
  44. package/runtime/e2b/attach-helper.cjs +167 -0
  45. package/runtime/e2b/claude-managed-settings.json +116 -0
  46. package/runtime/e2b/ctl.cjs +24158 -0
  47. package/runtime/e2b/custom-system-CLAUDE.md +46 -0
  48. package/runtime/e2b/gh-shim +344 -0
  49. package/runtime/e2b/git-shim +131 -0
  50. package/runtime/e2b/scripts/build-template.sh +295 -0
  51. package/runtime/hetzner/agentbox-setup-skill.md +67 -1
  52. package/runtime/hetzner/agentbox-vnc-start +17 -6
  53. package/runtime/hetzner/claude-managed-settings.json +2 -1
  54. package/runtime/hetzner/ctl.cjs +361 -43
  55. package/runtime/relay/bin.cjs +380 -233
  56. package/runtime/vercel/agentbox-setup-skill.md +67 -1
  57. package/runtime/vercel/agentbox-vnc-start +17 -6
  58. package/runtime/vercel/claude-managed-settings.json +2 -1
  59. package/runtime/vercel/ctl.cjs +361 -43
  60. package/share/agentbox-setup/SKILL.md +67 -1
  61. package/share/host-skills/agentbox-info/SKILL.md +47 -35
  62. package/dist/chunk-2LF5YILI.js.map +0 -1
  63. package/dist/chunk-4NQXNQ53.js.map +0 -1
  64. package/dist/chunk-B4QG2MCW.js.map +0 -1
  65. package/dist/chunk-ECLLV5JH.js.map +0 -1
  66. package/dist/chunk-QYRK5H6Q.js.map +0 -1
  67. package/dist/chunk-R5XIDQFR.js.map +0 -1
  68. package/dist/chunk-SNTHHWKY.js.map +0 -1
  69. package/dist/dist-7KVUIKJX.js.map +0 -1
  70. package/dist/dist-JAN5VABY.js.map +0 -1
  71. package/dist/dist-OPIBZ7XM.js.map +0 -1
  72. /package/dist/{_cloud-attach-HJC672UR.js.map → _cloud-attach-R6TRWG5L.js.map} +0 -0
  73. /package/dist/{dist-OG6NW6SM.js.map → dist-OGJGZETZ.js.map} +0 -0
  74. /package/dist/{prepared-state-MQHD3M5F-KE4DT3GX.js.map → prepared-state-MQHD3M5F-Q27AZU53.js.map} +0 -0
@@ -23,10 +23,22 @@ mkdir -p "$HOME/.vnc"
23
23
  # VncAuth truncates >8 chars at compare time, which is fine — the host writes
24
24
  # an 8-char password. Write to a temp file + rename so a failure (e.g.,
25
25
  # vncpasswd missing) doesn't leave an empty file that Xvnc would then reject.
26
- TMP_PASSWD="$(mktemp "$HOME/.vnc/passwd.XXXXXX")"
27
- printf '%s\n' "$PASS" | vncpasswd -f > "$TMP_PASSWD"
28
- chmod 600 "$TMP_PASSWD"
29
- mv "$TMP_PASSWD" "$HOME/.vnc/passwd"
26
+ #
27
+ # Debian 12 (E2B base) doesn't package vncpasswd at all — tigervnc-tools is
28
+ # Ubuntu-only. When vncpasswd is missing, fall back to `-SecurityTypes None`
29
+ # and rely on the cloud provider's signed preview URL as the access boundary
30
+ # (same effective model: holding the URL = holding the credential). Other
31
+ # providers (docker, hetzner, daytona, vercel) keep VncAuth.
32
+ VNC_SECURITY_ARGS=(-SecurityTypes VncAuth -PasswordFile "$HOME/.vnc/passwd")
33
+ if command -v vncpasswd >/dev/null 2>&1; then
34
+ TMP_PASSWD="$(mktemp "$HOME/.vnc/passwd.XXXXXX")"
35
+ printf '%s\n' "$PASS" | vncpasswd -f > "$TMP_PASSWD"
36
+ chmod 600 "$TMP_PASSWD"
37
+ mv "$TMP_PASSWD" "$HOME/.vnc/passwd"
38
+ else
39
+ echo "agentbox-vnc-start: vncpasswd not installed; starting Xvnc with -SecurityTypes None (preview URL is the access boundary)" >&2
40
+ VNC_SECURITY_ARGS=(-SecurityTypes None)
41
+ fi
30
42
 
31
43
  mkdir -p /var/log/agentbox 2>/dev/null || true
32
44
 
@@ -37,8 +49,7 @@ mkdir -p /var/log/agentbox 2>/dev/null || true
37
49
  # accept cut-text from noVNC, set both the X CLIPBOARD and PRIMARY selections.
38
50
  Xvnc :1 \
39
51
  -localhost \
40
- -SecurityTypes VncAuth \
41
- -PasswordFile "$HOME/.vnc/passwd" \
52
+ "${VNC_SECURITY_ARGS[@]}" \
42
53
  -geometry 1280x800 \
43
54
  -depth 24 \
44
55
  -AlwaysShared \
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env bash
2
+ # agentbox: resolve the Chromium that agent-browser drives.
3
+ #
4
+ # We deliberately do NOT bake a Chromium into the box image. Playwright pins an
5
+ # exact Chromium build per playwright version, so a baked browser goes stale the
6
+ # moment a project pins a different Playwright — the project's `playwright
7
+ # install` then fetches a *different* build, and anything waiting on the baked
8
+ # one (a hard-coded ms-playwright path) hangs forever. Instead we reuse the
9
+ # *project's* Playwright Chromium — the exact build its own tests use — so
10
+ # agent-browser and Playwright share a single binary that is always correct.
11
+ #
12
+ # `AGENT_BROWSER_EXECUTABLE_PATH=/usr/local/bin/chromium` points at this script,
13
+ # so every agent-browser launch runs it. Resolution is effectively cached: once
14
+ # a build exists the hot path is just an `ls` + `exec` (no download).
15
+ #
16
+ # 1. newest installed Playwright Chromium under ~/.cache/ms-playwright; else
17
+ # 2. install one with the project's pinned Playwright (matching build), or the
18
+ # box's global Playwright as a fallback when the project has none; then
19
+ # 3. exec the real binary with the args agent-browser passed.
20
+ #
21
+ # Chrome-for-Testing has no linux/arm64 build and Noble's chromium apt package
22
+ # is a snap stub, so Playwright's downloader stays the only reliable cross-arch
23
+ # source — we just invoke it lazily, with the project's version, instead of
24
+ # baking a fixed one.
25
+
26
+ newest_chrome() {
27
+ ls -d "$HOME"/.cache/ms-playwright/chromium-*/chrome-linux*/chrome 2>/dev/null | sort -V | tail -1
28
+ }
29
+
30
+ real="$(newest_chrome)"
31
+ if [ -z "$real" ]; then
32
+ echo "agentbox: no Chromium yet; installing via Playwright (one-time)..." >&2
33
+ # Serialize concurrent first-launches: without a lock, two simultaneous
34
+ # agent-browser launches would both run `playwright install` into the same
35
+ # ~/.cache/ms-playwright, and one could exec a half-extracted binary. flock
36
+ # makes the second waiter re-check the cache and skip the redundant install.
37
+ mkdir -p "$HOME/.cache" 2>/dev/null
38
+ exec 9>"$HOME/.cache/agentbox-chromium-install.lock"
39
+ flock 9
40
+ real="$(newest_chrome)" # another launch may have installed it while we waited
41
+ if [ -z "$real" ]; then
42
+ if [ -x /workspace/node_modules/.bin/playwright ]; then
43
+ ( cd /workspace && ./node_modules/.bin/playwright install chromium ) >&2
44
+ else
45
+ playwright install chromium >&2
46
+ fi
47
+ real="$(newest_chrome)"
48
+ fi
49
+ flock -u 9
50
+ fi
51
+
52
+ if [ -z "$real" ] || [ ! -x "$real" ]; then
53
+ echo "agentbox: could not resolve a Chromium binary (Playwright install failed?)." >&2
54
+ exit 127
55
+ fi
56
+
57
+ exec "$real" "$@"
@@ -1,5 +1,6 @@
1
1
  {
2
- "$comment": "AgentBox enterprise-managed Claude Code settings, baked into the box image at /etc/claude-code/managed-settings.json. Highest precedence and NOT synced from the host ~/.claude, so claude-hooks-filter.ts never touches it; per Claude Code, hook arrays MERGE across settings sources, so the user's own hooks still run. These hooks report Claude's activity to the box supervisor (agentbox-ctl claude-state -> ctl socket -> relay -> ~/.agentbox/boxes/<id>/status.json) so `agentbox status/list/inspect` can show it even when the box is paused. Each command is exit-0 fast and shell-backgrounded so a hook can never block or fail a Claude turn. The ExitPlanMode / AskUserQuestion entries run SYNCHRONOUSLY (no &) because they consume the hook's stdin payload; the catchall PreToolUse 'working' hook races with them, but the supervisor's sticky-state semantics swallow that race (a 'working' set while in end-plan/question is ignored unless --clear-pending is set, which only the matching PostToolUse hook passes).",
2
+ "$comment": "AgentBox enterprise-managed Claude Code settings, baked into the box image at /etc/claude-code/managed-settings.json. Highest precedence and NOT synced from the host ~/.claude, so claude-hooks-filter.ts never touches it; per Claude Code, hook arrays MERGE across settings sources, so the user's own hooks still run. These hooks report Claude's activity to the box supervisor (agentbox-ctl claude-state -> ctl socket -> relay -> ~/.agentbox/boxes/<id>/status.json) so `agentbox status/list/inspect` can show it even when the box is paused. Each command is exit-0 fast and shell-backgrounded so a hook can never block or fail a Claude turn. The ExitPlanMode / AskUserQuestion entries run SYNCHRONOUSLY (no &) because they consume the hook's stdin payload; the catchall PreToolUse 'working' hook races with them, but the supervisor's sticky-state semantics swallow that race (a 'working' set while in end-plan/question is ignored unless --clear-pending is set, which only the matching PostToolUse hook passes). skipDangerousModePermissionPrompt pre-accepts the bypass-permissions disclaimer at policy precedence — AgentBox boxes are isolated, and `agentbox claude` defaults to --dangerously-skip-permissions, so the one-time accept gate would just trap every fresh box.",
3
+ "skipDangerousModePermissionPrompt": true,
3
4
  "hooks": {
4
5
  "UserPromptSubmit": [
5
6
  {
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env bash
2
+ # Pre-`docker commit` cleanup: strip ephemeral / disposable state so the
3
+ # captured checkpoint image is closer to "warm project state, nothing else".
4
+ #
5
+ # Invoked by the host via `docker exec --user root <container>
6
+ # /usr/local/bin/agentbox-checkpoint-cleanup` right before
7
+ # `docker commit`. Best-effort: every step is allowed to fail (a checkpoint
8
+ # capture should never block on cleanup hiccups).
9
+ #
10
+ # What we DELIBERATELY keep:
11
+ # - /workspace the actual point of the checkpoint
12
+ # - /home/vscode/.npm warm npm cache (next install is fast)
13
+ # - /home/vscode/.cache pnpm/yarn/Cargo/etc. caches
14
+ # - /var/lib/docker in-box dockerd's data root
15
+ # - /home/vscode/.claude the named volume is bind-mounted; image
16
+ # layer never sees it anyway
17
+ set +e
18
+
19
+ # apt: drop downloaded .deb cache and the package index. The index is ~50MB
20
+ # and gets refreshed on the next `apt-get update`; the .deb cache is reusable
21
+ # only if we don't change versions, which we usually do.
22
+ apt-get clean 2>/dev/null
23
+ rm -rf /var/lib/apt/lists/* 2>/dev/null
24
+
25
+ # Throwaway scratch dirs. Preserve /tmp/claude-* — that is the live in-box
26
+ # Claude Code session's working tree (its per-task stdout/stderr files). The
27
+ # agent that triggered this checkpoint *is* that session; deleting its task
28
+ # output mid-run makes its harness see ENOENT, treat the command as failed,
29
+ # and retry the checkpoint (observed: 5 duplicate auto-named checkpoints).
30
+ # Stale claude-* dirs baked into the image are tiny and Claude Code prunes
31
+ # them itself on the next session start.
32
+ find /tmp /var/tmp -mindepth 1 -maxdepth 1 ! -name 'claude-*' -exec rm -rf {} + 2>/dev/null
33
+
34
+ # Logs: truncate (don't delete) so the original file modes / ownerships stay
35
+ # intact for the next run. Targets common rotated archives too.
36
+ find /var/log -type f \( -name '*.log' -o -name '*.gz' -o -name '*.1' \) \
37
+ -exec truncate -s0 {} + 2>/dev/null
38
+ find /var/log/agentbox -type f -exec truncate -s0 {} + 2>/dev/null
39
+
40
+ # Bash history (root + vscode). Re-assert vscode ownership: `: >` run as root
41
+ # (re)creates the file root-owned 0644 when it didn't exist, which the uid-1000
42
+ # vscode user cannot append to, silently dropping all shell history.
43
+ : > /root/.bash_history 2>/dev/null
44
+ : > /home/vscode/.bash_history 2>/dev/null
45
+ chown vscode:vscode /home/vscode/.bash_history 2>/dev/null
46
+ chmod 600 /home/vscode/.bash_history 2>/dev/null
47
+
48
+ # Anthropic's installer writes a transient marker; redundant once the binary
49
+ # is in place. Safe to wipe.
50
+ rm -rf /home/vscode/.claude-installer 2>/dev/null
51
+
52
+ exit 0
@@ -0,0 +1,68 @@
1
+ {
2
+ "$comment": "Codex 0.134.0 expects `~/.codex/hooks.json` to be `{ hooks: { Event: [...] } }` (matching the `HooksFile` Rust struct), NOT a top-level event map. The `hooks` feature flag must also be enabled (`codex --enable hooks`) and hook trust must be either persisted via the in-TUI dialog or bypassed at launch (`--dangerously-bypass-hook-trust`). startCodexSession() does both. In practice the hook firing on the JSON-config path is still unreliable in 0.134.0 (TUI mode skips them on at least some startup paths) — the real mechanism that lights up state in production is the tmux-pane scraper in packages/ctl/src/codex-scraper.ts. These hooks remain as a defense-in-depth seed so any future codex build that fixes the firing also lights up state without further work.",
3
+ "hooks": {
4
+ "SessionStart": [
5
+ {
6
+ "hooks": [
7
+ { "type": "command", "command": "agentbox-ctl codex-state idle >/dev/null 2>&1 &", "timeout": 3 }
8
+ ]
9
+ }
10
+ ],
11
+ "UserPromptSubmit": [
12
+ {
13
+ "hooks": [
14
+ { "type": "command", "command": "agentbox-ctl codex-state working >/dev/null 2>&1 &", "timeout": 3 }
15
+ ]
16
+ }
17
+ ],
18
+ "PreToolUse": [
19
+ {
20
+ "hooks": [
21
+ { "type": "command", "command": "agentbox-ctl codex-state working >/dev/null 2>&1 &", "timeout": 3 }
22
+ ]
23
+ }
24
+ ],
25
+ "PermissionRequest": [
26
+ {
27
+ "hooks": [
28
+ { "type": "command", "command": "agentbox-ctl codex-state waiting >/dev/null 2>&1 &", "timeout": 3 }
29
+ ]
30
+ }
31
+ ],
32
+ "PreCompact": [
33
+ {
34
+ "hooks": [
35
+ { "type": "command", "command": "agentbox-ctl codex-state compacting >/dev/null 2>&1 &", "timeout": 3 }
36
+ ]
37
+ }
38
+ ],
39
+ "PostCompact": [
40
+ {
41
+ "hooks": [
42
+ { "type": "command", "command": "agentbox-ctl codex-state working >/dev/null 2>&1 &", "timeout": 3 }
43
+ ]
44
+ }
45
+ ],
46
+ "SubagentStart": [
47
+ {
48
+ "hooks": [
49
+ { "type": "command", "command": "agentbox-ctl codex-state working >/dev/null 2>&1 &", "timeout": 3 }
50
+ ]
51
+ }
52
+ ],
53
+ "SubagentStop": [
54
+ {
55
+ "hooks": [
56
+ { "type": "command", "command": "agentbox-ctl codex-state working >/dev/null 2>&1 &", "timeout": 3 }
57
+ ]
58
+ }
59
+ ],
60
+ "Stop": [
61
+ {
62
+ "hooks": [
63
+ { "type": "command", "command": "agentbox-ctl codex-state idle >/dev/null 2>&1 &", "timeout": 3 }
64
+ ]
65
+ }
66
+ ]
67
+ }
68
+ }
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env bash
2
+ # Routes in-box URL opens to `agentbox-ctl open`, which opens the link in the
3
+ # box's own Chromium (agent-browser, visible via `agentbox screen`) and asks
4
+ # the host user — in the footer/dashboard — whether to also open it on the
5
+ # host. This script is installed at /usr/local/bin (earlier in PATH than
6
+ # xdg-utils' /usr/bin/xdg-open, which it is also symlinked over) and is the
7
+ # box's $BROWSER, so `xdg-open`, Claude Code's OAuth flow, `gh`,
8
+ # `git web--browse`, python's webbrowser, etc. all land here.
9
+ #
10
+ # Only http(s) URLs are routed. Anything else (a file path, another scheme)
11
+ # falls through to the real xdg-open, which resolves it locally in the box.
12
+
13
+ set -uo pipefail
14
+
15
+ target="${1:-}"
16
+
17
+ case "$target" in
18
+ http://* | https://*)
19
+ exec agentbox-ctl open "$target"
20
+ ;;
21
+ *)
22
+ if [[ -x /usr/bin/xdg-open ]]; then
23
+ exec /usr/bin/xdg-open "$@"
24
+ fi
25
+ echo "agentbox-open: not an http(s) URL: $target" >&2
26
+ exit 1
27
+ ;;
28
+ esac
@@ -0,0 +1,263 @@
1
+ ---
2
+ name: agentbox-setup
3
+ description: Generate an agentbox.yaml for the current AgentBox workspace. Invoke when the user opens a sandbox without an agentbox.yaml or asks to (re)configure one.
4
+ ---
5
+
6
+ # /agentbox-setup
7
+
8
+ ## Box layout (what you're configuring against)
9
+
10
+ Your user i `vscode` and you can use `sudo` to run commands as root.
11
+
12
+ `/workspace` is where the user code lives, a per-box git worktree on a fresh `agentbox/<box-name>` branch (or a tar-piped copy of the host workspace for non-git projects).
13
+ Run `agentbox checkpoint --set-default` (similar to `docker commit`) to save any changes make to the system and workspace so that new boxes will start from a warm state. Everything is wiped on `agentbox destroy`.
14
+
15
+ Some special folders:
16
+
17
+ - **Host main repo's `.git/`** — If the box bind-mounted RW at its identical absolute host path. In-box commits land on the host's branch refs (visible to `git log` on the host immediately); the box itself carries no SSH/git creds, so `git push` goes through the host relay (`agentbox-ctl git push`). The host's **working tree is never written to** — only refs/objects under `.git/`. GitHub PR ops (`agentbox-ctl git pr create|view|list|comment|review|merge|close|reopen|checkout`) flow the same way through host `gh`; write ops require host confirmation (deny → exit 10), `merge` and `checkout` have additional opt-in guards.
18
+ - **`~/.claude`** — and similar home folders for coding agents are seeded from the host's `~/.claude` on each create so auth, skills, and plugins persist without leaking the host's home dir.
19
+ - **`agentbox.yaml`** — read by `agentbox-ctl` from `/workspace`. Tasks and services declared here are what the supervisor will run.
20
+
21
+ Exposed ports and services:
22
+ - **portless** - every port with `expose:` setting in agentbox.yaml, will be exposed not only as a local port but also as a special domain name `https://<name>.localhost` (so on https) using `portless` cli and proxy. This will be also mapped to the host where also `portless` proxy is running so users can access the same service on the same looking url.
23
+ - **vnc** - the webVNC server exposed on 6080 will be proxies to the host on a random port.
24
+ - **vscode** - the vscode server is proxied to the host on a random port.
25
+
26
+ ## Goal
27
+
28
+ Produce a `/workspace/agentbox.yaml` that captures this project's services, tasks, and box defaults so the in-box supervisor (`agentbox-ctl`) can boot the workspace deterministically.
29
+
30
+ `agentbox.yaml` is **declarative**. The supervisor reads it on box start, but you don't have to restart the box: after you write the file, `agentbox-ctl reload` (run from inside the box) makes the already-running supervisor re-read it and immediately run the declared tasks and autostart the services. See step 8.
31
+
32
+ ## 1. Discover the project
33
+
34
+ Look at `/workspace`:
35
+
36
+ - Top-level manifests: `package.json`, `pyproject.toml` / `requirements.txt`, `Cargo.toml`, `go.mod`, `Gemfile`, `composer.json`, `mix.exs`, etc. — these tell you the runtime.
37
+ - `docker-compose.yaml` / `docker-compose.yml` — often lists the real services the project expects.
38
+ - `package.json` → `scripts`: look for `dev`, `start`, `build`, `test`, `migrate`, `seed`.
39
+ - `Makefile` / `justfile` / `Taskfile.yaml` — alternative task runners.
40
+ - Listening ports: grep for `listen(`, `PORT=`, framework defaults (3000 for Next.js / Nuxt, 5173 for Vite, 8000 for Django, 8080 for Spring, etc.).
41
+ - Database / cache deps to spin up locally (Postgres, Redis, …) — declare them as services if the project doesn't expect them to be external.
42
+
43
+ ## 2. Pick services and tasks
44
+
45
+ - **Services** = long-running. Web servers, watchers, queue workers, databases. `restart: on-failure` by default.
46
+ - **Tasks** = one-shot. `pnpm install`, DB migrations, codegen, fixture loaders, install apt packages. Wire dependent services with `needs:` so they wait for the task to finish successfully.
47
+ - Names: must match `[A-Za-z0-9_-]+`. Task names and service names share a namespace — no collisions.
48
+ - No cycles in `needs:`.
49
+ - **Always generate a dependency-install task** and make it the root of the `needs:` graph (every service that needs deps gets `needs: [install, …]`). Future boxes start from a snapshot of the final filesystem so they won't need this, but updates or moving to a cloud provider might need to rebuild the container from scratch. The filesystem can be then later captured by `agentbox-ctl checkpoint --set-default`. The task must be **idempotent and self-healing**: `agentbox-ctl` re-runs pending tasks on every box stop/start (the daemon dies with the container and is relaunched), so a plain `rm -rf node_modules && install` would wipe + reinstall on every start. Guard the rebuild with a marker file *inside* `node_modules` (the `.agentbox-installed` convention AgentBox uses internally): rebuild only when the marker is absent (fresh box), and be a fast no-op once it exists. Detect the package manager from the lockfile — never hardcode `pnpm`. See the worked example below.
50
+ - **Add a comment to the beginning** of the file to explain what you did and what issues you encountered, so that future run might use this information in case the project evolves and you need to update the agentbox.yaml file.
51
+
52
+ ### Stateful services: data persistence & re-seeding (read this for databases)
53
+
54
+ **A checkpoint does NOT capture docker-in-docker data.** `agentbox checkpoint` is a `docker commit` of the box's writable filesystem (the system + `/workspace`). The in-box `dockerd` keeps its storage in a *separate* per-box volume (`/var/lib/docker`), which is **not** part of that image — it's fresh on every new box and wiped on `agentbox destroy`. So a database or cache you run as a **docker container** (e.g. `docker run … postgres`) starts **empty on every new box** created from a checkpoint (every `agentbox claude` / `agentbox create`), even though `/workspace` and any marker files you wrote were restored. (A DB run as a **native process** with its data dir on the box filesystem — e.g. `postgres -D /var/lib/postgresql/data` — *is* captured by the checkpoint, since it lives in the writable layer.)
55
+
56
+ **Consequence for migrate/seed tasks of a containerized DB: do not gate them on a filesystem marker.** A marker like `node_modules/.agentbox-installed` is correct for deps (they live in `/workspace`, which the checkpoint captures), but **wrong** for DB data living in a docker volume: the marker is restored from the checkpoint while the DB is empty, so a marker-guarded seed wrongly skips and the app boots against an empty database. Instead, **gate on the actual data** — connect to the DB and check whether a sentinel table/row exists, and seed only when it's missing:
57
+
58
+ ```yaml
59
+ seed:
60
+ # Re-seed when the DB is empty. The postgres data lives in the in-box
61
+ # docker volume, which is NOT captured by `agentbox checkpoint` — so a box
62
+ # started from a checkpoint has the workspace warm but an empty DB. We can't
63
+ # use a filesystem marker here (it would be restored while the DB is blank);
64
+ # instead probe the DB and seed only if the data is absent. Fast no-op once
65
+ # the data is present.
66
+ command: |
67
+ set -e
68
+ export PGPASSWORD=postgres
69
+ # Probe for existing data. If the table is missing the query errors,
70
+ # stderr is suppressed, stdout is empty, the grep fails — so we seed.
71
+ if psql -h 127.0.0.1 -p 5432 -U postgres -d app -tAc \
72
+ "SELECT EXISTS (SELECT 1 FROM users LIMIT 1)" 2>/dev/null | grep -q t; then
73
+ echo "data present — skip seed"
74
+ exit 0
75
+ fi
76
+ pnpm db:seed
77
+ needs: [install, migrate]
78
+ ```
79
+
80
+ **Lifecycle nuance (this is why the data check, not a marker, is right):**
81
+
82
+ - **Box stop → start** (`agentbox stop`/`start`): the supervisor daemon dies with the container and is relaunched, so it **re-runs all tasks** from `pending`. The per-box docker volume *does* survive stop/start, so the DB still has data — the data check makes the seed a fast no-op.
83
+ - **New box from a checkpoint** (`agentbox claude`/`create`): tasks run and the DB volume is empty → the check fails → the seed runs. Correct.
84
+ - **Resume after pause** (`agentbox pause`/`unpause`): the daemon is frozen and thawed, **not** restarted, so tasks do **not** re-run at all — nothing to seed, the running DB is untouched.
85
+
86
+ (Migrations are usually safe to re-run as-is: migration tools track applied migrations in their own table, which on a fresh box is empty, so they simply re-apply. Only the *data* seed needs the existence check.) Install the DB client the seed/migrate tasks need (e.g. `postgresql-client`) in the `install` task — don't `docker exec` the DB container for these checks (nested `docker exec` fails inside a box with a `setns` error); reach it over TCP with the client tools instead.
87
+
88
+ ## 3. Wire readiness probes (services only)
89
+
90
+ `ready_when:` lets the supervisor decide when a service is "ready" (vs. just "running"). Exactly one of these must be present:
91
+
92
+ - `port: 3000` — TCP connect (default host `127.0.0.1`; override with `host:`).
93
+ - `log_match: "Listening on"` — regex matched against stdout/stderr. First match flips the service to ready.
94
+ - `http: "http://127.0.0.1:3000/health"` — GET probe. Optional `expect_status: 200` (default: any 2xx).
95
+
96
+ Tunables: `interval_ms` (default 500), `initial_delay_ms` (default 0), `timeout_ms` (default 60000), `on_timeout: kill | mark_unhealthy` (default `kill` — re-enters the restart policy).
97
+
98
+ ### Mark the web service with `expose:`
99
+
100
+ The box's primary web app (the dev server / Next.js / API the user opens in a browser) should declare:
101
+
102
+ ```yaml
103
+ expose:
104
+ port: 3000 # the port this service listens on inside the box
105
+ as: 80 # must be 80 — the container port AgentBox publishes
106
+ ```
107
+
108
+ At most **one** service may set `expose:`. AgentBox forwards container `:80` to `127.0.0.1:<port>` and publishes it on the host with `portless` proxy to a <boxname>.localhost url, so `agentbox list`/`status` show it as the box's main URL on every engine (no OrbStack dependency). Set this on the same service whose `ready_when:` you just wrote (a DB or worker should **not** get `expose:`).
109
+
110
+ ## 4. Restart + backoff
111
+
112
+ Per service:
113
+
114
+ - `restart: always | on-failure | never` (default `on-failure`).
115
+ - `backoff:` — `initial_ms` (default 500), `max_ms` (default 30000; must be `>= initial_ms`), `factor` (default 2).
116
+
117
+ ## 5. (Optional) `defaults:` block
118
+
119
+ Sets per-project defaults for `agentbox create`/`claude`/`code`/`shell` — same shape as `~/.agentbox/config.yaml`. CLI flags still override. Common keys:
120
+
121
+ - `box.hostSnapshot` (bool) — APFS-clone the *host* workspace into a per-box scratch dir before seeding `/workspace` (stabilizes the tar-pipe source).
122
+ - `box.defaultCheckpoint` (string) — checkpoint new boxes start from (normally you set this via `agentbox-ctl checkpoint --set-default` at the end of setup — see section 9, not by hand).
123
+ - `box.withPlaywright` (bool) — install `@playwright/cli` globally inside the box.
124
+ - `box.vnc` (bool) — run Xvnc + noVNC on container port 6080.
125
+ - `box.isolateClaudeConfig` (bool) — per-box `~/.claude` volume instead of the shared one.
126
+ - `code.ide` — `vscode | cursor | auto`.
127
+ - `code.autoTerminals` (bool) — auto-generate `.vscode/tasks.json` with per-service tails.
128
+ - `browser.default` — `agent-browser | playwright | both`.
129
+
130
+ Full key list (run on the host): `agentbox config list --keys`.
131
+
132
+ ## 6. Worked example
133
+
134
+ ```yaml
135
+ # yaml-language-server: $schema=https://agent-box.sh/schema/agentbox.schema.json
136
+ # This agentbox.yaml setup this Next.js project, and includes:
137
+ # - a postgres database because it's used in the project
138
+ # - an inngest server for queues
139
+ # - a fix to move .turbo/cache folder to the workspace to avoid a permission error during setup
140
+ # - ...
141
+ defaults:
142
+ box:
143
+ withPlaywright: true
144
+ code:
145
+ ide: cursor
146
+
147
+ tasks:
148
+ # Idempotent install. /workspace is the container's writable filesystem, so
149
+ # node_modules persists across pause/stop/start and is captured by
150
+ # `agentbox checkpoint`. The host's node_modules is macOS-native and is
151
+ # never copied in, so force a clean Linux build the first time — but skip
152
+ # on every subsequent box start (agentbox-ctl re-runs pending tasks after
153
+ # stop/start). Adjust the lockfile detection to the project's package
154
+ # manager.
155
+ install:
156
+ command: |
157
+ set -e
158
+ MARKER=node_modules/.agentbox-installed
159
+ [ -f "$MARKER" ] && { echo "deps installed (marker present) — skip"; exit 0; }
160
+ apt-get update && apt-get install -y postgresql-client
161
+ rm -rf node_modules
162
+ if [ -f pnpm-lock.yaml ]; then
163
+ corepack enable >/dev/null 2>&1 || true
164
+ pnpm install --frozen-lockfile || pnpm install
165
+ fi
166
+ touch "$MARKER"
167
+
168
+ migrate:
169
+ command: pnpm db:migrate
170
+ needs: [install]
171
+
172
+ services:
173
+ postgres:
174
+ command: postgres -D /var/lib/postgresql/data
175
+ ready_when:
176
+ port: 5432
177
+ restart: always
178
+
179
+ dev:
180
+ command: pnpm dev
181
+ needs: [install, migrate, postgres]
182
+ ready_when:
183
+ port: 3000
184
+ timeout_ms: 120000
185
+ expose:
186
+ port: 3000
187
+ as: 80
188
+ restart: on-failure
189
+ backoff:
190
+ initial_ms: 500
191
+ max_ms: 5000
192
+ factor: 2
193
+ ```
194
+
195
+ ## 6b. Bringing extra host files/folders into the box
196
+
197
+ Two ways to copy host files in (both COPY — never a live mount, so the box can't
198
+ write back to the host):
199
+
200
+ - **`carry:` block** (declarative, in `agentbox.yaml`) — for files/dirs every box
201
+ should get at create time. Each entry is `{ src, dest }` with optional `mode`,
202
+ `user`, `optional`, and `exclude:` (a list of tar globs / bare dir names to drop
203
+ when copying a directory). Heavy regenerable dirs (`.git`, `node_modules`, `bin`,
204
+ `obj`, `packages`, `dist`, `.next`, `target`) are dropped by default; `exclude:`
205
+ is additive. Each carry entry is capped at `box.cpMaxBytes` (default 100 MiB
206
+ after excludes) — the same limit `agentbox cp` enforces.
207
+ - **`agentbox-ctl cp fromHost <hostPath> <boxPath>`** (ad-hoc, from inside the box)
208
+ — for a one-off copy. Prompts the user on the host to approve.
209
+
210
+ **The per-copy size limit (important for large/legacy folders).** A single copy is
211
+ blocked above `box.cpMaxBytes` (default **100 MB**) *after* default excludes, so it
212
+ fails loud instead of silently hanging. When blocked you get a `du`-style tree of
213
+ the biggest remaining folders/subfolders. To get under the limit, EITHER:
214
+
215
+ - **drop what the box can regenerate** (the default excludes already remove
216
+ `node_modules`/`.git`/build output; add more with `--exclude=<glob-or-name>`), OR
217
+ - **copy the heavy folders one at a time** so each copy is under the limit, OR
218
+ - pass `--yes` to copy the whole thing anyway (only when you really need it all).
219
+
220
+ Example: a 2.4 GB legacy folder is mostly `packages/` (NuGet) + `.git`; those are
221
+ excluded by default, and what's left can be split:
222
+ `agentbox-ctl cp fromHost ../legacy/src /workspace/legacy/src` then
223
+ `... cp fromHost ../legacy/Database /workspace/legacy/Database`.
224
+
225
+ ## 7. Validate before handing off
226
+
227
+ - check with `agentbox-ctl reload` and then `agentbox-ctl status` that everything is running as expected.
228
+ - Every name in `needs:` must reference an existing task or service.
229
+ - A service with `restart: never` and an autostart dependency will block the dependent forever after one failed run — usually a mistake.
230
+ - `command:` is either a shell string (run via `bash -c`) or an argv array. Use the argv form if you need to avoid shell quoting.
231
+
232
+ ## 8. Hand-off
233
+
234
+ Tell the user (verbatim):
235
+
236
+ ```
237
+ █████╗ ██████╗ ███████╗███╗ ██╗████████╗██████╗ ██████╗ ██╗ ██╗
238
+ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝██╔══██╗██╔═══██╗╚██╗██╔╝
239
+ ███████║██║ ███╗█████╗ ██╔██╗ ██║ ██║ ██████╔╝██║ ██║ ╚███╔╝
240
+ ██╔══██║██║ ██║██╔══╝ ██║╚██╗██║ ██║ ██╔══██╗██║ ██║ ██╔██╗
241
+ ██║ ██║╚██████╔╝███████╗██║ ╚████║ ██║ ██████╔╝╚██████╔╝██╔╝ ██╗
242
+ ╚═╝ ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝
243
+ ```
244
+
245
+ your box is ready, you can start more sessions with `agentbox claude`
246
+ you can access the web app at https://<boxname>.localhost
247
+
248
+
249
+ ## 9. Checkpoint the warm state - DON't SKIP THIS STEP
250
+
251
+ Checkpoint (snapshot) this box writable layer: once the box is warmed up (deps installed, services ready), checkpoint it with `agentbox-ctl checkpoint --name setup --replace --set-default` so future boxes start ready.
252
+ Remember the checkpoint captures the writable layer (`/workspace` + system), **not** docker-in-docker volumes — so a containerized DB's data does not carry into new boxes. That's expected; the data-existence-gated seed task from section 2 re-seeds those automatically. (If you need the data itself to persist into new boxes, run the DB as a native process with its data dir on the box filesystem, or bind a `/workspace` path as the container's data volume so it lands in the checkpoint.)
253
+ Run this command exactly once. The `--name setup --replace` makes it idempotent — if it ever needs to run again it overwrites the existing `setup` checkpoint instead of stacking duplicates.
254
+ On all providers except Vercel, this doesn't need to be confirmed by the user. It will pause the container for several seconds so warn the user about it and write Done when it's done.
255
+ On Vercel: this actually STOPS the sandbox, so warn the user about it. Also the system will ask confirmation.
256
+
257
+ ## 10. Known issues
258
+
259
+ - For Nextjs/Vite/Tasnstack projects, makes sure to forward also websocket for hot reload.
260
+
261
+ - Service like flask, nextjs, BETTER_AUTH_URL, NEXT_PUBLIC_APP_URL should use the <boxname>.localhost url for the local development so that on the host it will use the same url as the box.
262
+
263
+ - The `install` task is intentionally a no-op once `node_modules/.agentbox-installed` exists. Do **not** remove the marker guard to "force a fresh install" — that reinstalls on every box start. To force a one-off rebuild, delete `node_modules` (or just the marker) then run `agentbox-ctl reload`.
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env bash
2
+ # Start the per-box VNC stack: Xvnc on :1 (loopback inside container) +
3
+ # websockify on 0.0.0.0:6080 serving noVNC's HTML5 client and proxying RFB.
4
+ # Launched by the host via `docker exec -d --user vscode` after the container
5
+ # is up. Idempotent — re-running while the daemons are alive is a no-op, so
6
+ # `agentbox start` can blindly call us again.
7
+
8
+ set -euo pipefail
9
+
10
+ PASS="${AGENTBOX_VNC_PASSWORD:-}"
11
+ if [[ -z "$PASS" ]]; then
12
+ echo "agentbox-vnc-start: AGENTBOX_VNC_PASSWORD is not set" >&2
13
+ exit 64
14
+ fi
15
+
16
+ if pgrep -u "$(id -u)" -x Xvnc >/dev/null \
17
+ && pgrep -u "$(id -u)" -f "websockify.*6080" >/dev/null; then
18
+ exit 0
19
+ fi
20
+
21
+ mkdir -p "$HOME/.vnc"
22
+ # vncpasswd's -f mode reads plaintext on stdin, writes the DES blob to stdout.
23
+ # VncAuth truncates >8 chars at compare time, which is fine — the host writes
24
+ # an 8-char password. Write to a temp file + rename so a failure (e.g.,
25
+ # vncpasswd missing) doesn't leave an empty file that Xvnc would then reject.
26
+ #
27
+ # Debian 12 (E2B base) doesn't package vncpasswd at all — tigervnc-tools is
28
+ # Ubuntu-only. When vncpasswd is missing, fall back to `-SecurityTypes None`
29
+ # and rely on the cloud provider's signed preview URL as the access boundary
30
+ # (same effective model: holding the URL = holding the credential). Other
31
+ # providers (docker, hetzner, daytona, vercel) keep VncAuth.
32
+ VNC_SECURITY_ARGS=(-SecurityTypes VncAuth -PasswordFile "$HOME/.vnc/passwd")
33
+ if command -v vncpasswd >/dev/null 2>&1; then
34
+ TMP_PASSWD="$(mktemp "$HOME/.vnc/passwd.XXXXXX")"
35
+ printf '%s\n' "$PASS" | vncpasswd -f > "$TMP_PASSWD"
36
+ chmod 600 "$TMP_PASSWD"
37
+ mv "$TMP_PASSWD" "$HOME/.vnc/passwd"
38
+ else
39
+ echo "agentbox-vnc-start: vncpasswd not installed; starting Xvnc with -SecurityTypes None (preview URL is the access boundary)" >&2
40
+ VNC_SECURITY_ARGS=(-SecurityTypes None)
41
+ fi
42
+
43
+ mkdir -p /var/log/agentbox 2>/dev/null || true
44
+
45
+ # Xvnc on display :1, loopback-only (websockify is the only public ingress).
46
+ # 1280x800x24 is a sensible laptop-browser viewport.
47
+ # The clipboard params are on-by-default in TigerVNC 1.13 but pinned here so a
48
+ # base-image bump can't silently break host->box paste, and to document intent:
49
+ # accept cut-text from noVNC, set both the X CLIPBOARD and PRIMARY selections.
50
+ Xvnc :1 \
51
+ -localhost \
52
+ "${VNC_SECURITY_ARGS[@]}" \
53
+ -geometry 1280x800 \
54
+ -depth 24 \
55
+ -AlwaysShared \
56
+ -AcceptCutText=1 \
57
+ -SendCutText=1 \
58
+ -SetPrimary=1 \
59
+ -SendPrimary=1 \
60
+ >/var/log/agentbox/xvnc.log 2>&1 &
61
+
62
+ # Wait for Xvnc's RFB socket (5901). bash's /dev/tcp pseudo-device makes the
63
+ # probe a one-liner without needing netcat in the image.
64
+ for _ in $(seq 1 50); do
65
+ if (echo > /dev/tcp/127.0.0.1/5901) 2>/dev/null; then break; fi
66
+ sleep 0.1
67
+ done
68
+
69
+ # With no window manager, nothing owns the X selections, so Xvnc's RFB cut-text
70
+ # isn't reliably handed to Chromium on Ctrl+V. autocutsel (one daemon per
71
+ # selection) keeps CLIPBOARD and PRIMARY populated and synced. Best-effort: a
72
+ # clipboard failure must never abort VNC, and the pgrep guard keeps a stray
73
+ # re-entry from spawning duplicates (Xvnc is up now, so DISPLAY=:1 resolves).
74
+ if ! pgrep -u "$(id -u)" -x autocutsel >/dev/null; then
75
+ DISPLAY=:1 autocutsel -selection CLIPBOARD -fork >/dev/null 2>&1 || true
76
+ DISPLAY=:1 autocutsel -selection PRIMARY -fork >/dev/null 2>&1 || true
77
+ fi
78
+
79
+ # noVNC's static assets live at different paths per base image: Debian/Ubuntu
80
+ # (docker, hetzner) ship them at /usr/share/novnc via apt; the AL2023 bake
81
+ # (vercel) git-clones them to /usr/local/share/novnc. websockify runs
82
+ # os.chdir(--web) at startup, so a wrong path makes it FileNotFoundError and
83
+ # never bind 6080 — pick the first dir that exists.
84
+ NOVNC_WEB=""
85
+ for _d in /usr/share/novnc /usr/local/share/novnc; do
86
+ if [[ -d "$_d" ]]; then NOVNC_WEB="$_d"; break; fi
87
+ done
88
+ if [[ -z "$NOVNC_WEB" ]]; then
89
+ echo "agentbox-vnc-start: noVNC assets not found (looked in /usr/share/novnc, /usr/local/share/novnc)" >&2
90
+ exit 65
91
+ fi
92
+
93
+ # websockify serves noVNC at /vnc.html (--web) and tunnels WS frames to Xvnc's
94
+ # RFB. Bind 0.0.0.0:6080 so both Docker `-p` mappings and OrbStack's
95
+ # <name>.orb.local routing reach it.
96
+ websockify \
97
+ --web="$NOVNC_WEB" \
98
+ 0.0.0.0:6080 \
99
+ 127.0.0.1:5901 \
100
+ >/var/log/agentbox/websockify.log 2>&1 &
101
+
102
+ disown -a