create-merlin-brain 3.11.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.cjs +156 -32
- package/bin/runtime-adapters.cjs +396 -0
- package/dist/server/api/types.d.ts +7 -0
- package/dist/server/api/types.d.ts.map +1 -1
- package/dist/server/cost/tracker.d.ts +38 -2
- package/dist/server/cost/tracker.d.ts.map +1 -1
- package/dist/server/cost/tracker.js +87 -15
- package/dist/server/cost/tracker.js.map +1 -1
- package/dist/server/server.d.ts.map +1 -1
- package/dist/server/server.js +74 -30
- package/dist/server/server.js.map +1 -1
- package/dist/server/tools/__tests__/augmentation.test.d.ts +8 -0
- package/dist/server/tools/__tests__/augmentation.test.d.ts.map +1 -0
- package/dist/server/tools/__tests__/augmentation.test.js +76 -0
- package/dist/server/tools/__tests__/augmentation.test.js.map +1 -0
- package/dist/server/tools/__tests__/route-helpers.test.d.ts +5 -0
- package/dist/server/tools/__tests__/route-helpers.test.d.ts.map +1 -0
- package/dist/server/tools/__tests__/route-helpers.test.js +49 -0
- package/dist/server/tools/__tests__/route-helpers.test.js.map +1 -0
- package/dist/server/tools/adaptive.js +1 -1
- package/dist/server/tools/adaptive.js.map +1 -1
- package/dist/server/tools/agent-spawn.d.ts +25 -0
- package/dist/server/tools/agent-spawn.d.ts.map +1 -0
- package/dist/server/tools/agent-spawn.js +95 -0
- package/dist/server/tools/agent-spawn.js.map +1 -0
- package/dist/server/tools/agents-index.js +3 -3
- package/dist/server/tools/agents-index.js.map +1 -1
- package/dist/server/tools/agents.js +5 -5
- package/dist/server/tools/agents.js.map +1 -1
- package/dist/server/tools/augmentation.d.ts +45 -0
- package/dist/server/tools/augmentation.d.ts.map +1 -0
- package/dist/server/tools/augmentation.js +167 -0
- package/dist/server/tools/augmentation.js.map +1 -0
- package/dist/server/tools/behaviors.js +4 -4
- package/dist/server/tools/behaviors.js.map +1 -1
- package/dist/server/tools/context.js +7 -7
- package/dist/server/tools/context.js.map +1 -1
- package/dist/server/tools/cost.d.ts +3 -1
- package/dist/server/tools/cost.d.ts.map +1 -1
- package/dist/server/tools/cost.js +66 -13
- package/dist/server/tools/cost.js.map +1 -1
- package/dist/server/tools/discoveries.js +6 -6
- package/dist/server/tools/discoveries.js.map +1 -1
- package/dist/server/tools/index.d.ts +4 -0
- package/dist/server/tools/index.d.ts.map +1 -1
- package/dist/server/tools/index.js +4 -0
- package/dist/server/tools/index.js.map +1 -1
- package/dist/server/tools/learning.d.ts +12 -0
- package/dist/server/tools/learning.d.ts.map +1 -0
- package/dist/server/tools/learning.js +269 -0
- package/dist/server/tools/learning.js.map +1 -0
- package/dist/server/tools/project.js +7 -7
- package/dist/server/tools/project.js.map +1 -1
- package/dist/server/tools/promote.d.ts +11 -0
- package/dist/server/tools/promote.d.ts.map +1 -0
- package/dist/server/tools/promote.js +315 -0
- package/dist/server/tools/promote.js.map +1 -0
- package/dist/server/tools/route-helpers.d.ts +45 -0
- package/dist/server/tools/route-helpers.d.ts.map +1 -0
- package/dist/server/tools/route-helpers.js +93 -0
- package/dist/server/tools/route-helpers.js.map +1 -0
- package/dist/server/tools/route.d.ts +4 -3
- package/dist/server/tools/route.d.ts.map +1 -1
- package/dist/server/tools/route.js +80 -284
- package/dist/server/tools/route.js.map +1 -1
- package/dist/server/tools/session-restore.d.ts +18 -0
- package/dist/server/tools/session-restore.d.ts.map +1 -0
- package/dist/server/tools/session-restore.js +154 -0
- package/dist/server/tools/session-restore.js.map +1 -0
- package/dist/server/tools/session-search.d.ts +16 -0
- package/dist/server/tools/session-search.d.ts.map +1 -0
- package/dist/server/tools/session-search.js +240 -0
- package/dist/server/tools/session-search.js.map +1 -0
- package/dist/server/tools/sights-index.js +2 -2
- package/dist/server/tools/sights-index.js.map +1 -1
- package/dist/server/tools/smart-route.d.ts.map +1 -1
- package/dist/server/tools/smart-route.js +4 -5
- package/dist/server/tools/smart-route.js.map +1 -1
- package/dist/server/tools/verification.js +1 -1
- package/dist/server/tools/verification.js.map +1 -1
- package/files/agents/code-organization-supervisor.md +1 -0
- package/files/agents/context-guardian.md +1 -0
- package/files/agents/docs-keeper.md +1 -0
- package/files/agents/dry-refactor.md +1 -0
- package/files/agents/elite-code-refactorer.md +1 -0
- package/files/agents/hardening-guard.md +1 -0
- package/files/agents/implementation-dev.md +1 -0
- package/files/agents/merlin-access-control-reviewer.md +248 -0
- package/files/agents/merlin-codebase-mapper.md +1 -1
- package/files/agents/merlin-dependency-auditor.md +216 -0
- package/files/agents/merlin-executor.md +1 -0
- package/files/agents/merlin-input-validator.md +247 -0
- package/files/agents/merlin-reviewer.md +1 -0
- package/files/agents/merlin-sast-reviewer.md +182 -0
- package/files/agents/merlin-secret-scanner.md +203 -0
- package/files/agents/tests-qa.md +1 -0
- package/files/commands/merlin/execute-phase.md +94 -197
- package/files/commands/merlin/execute-plan.md +116 -180
- package/files/commands/merlin/health.md +385 -0
- package/files/commands/merlin/loop-recipes.md +93 -36
- package/files/commands/merlin/optimize-prompts.md +158 -0
- package/files/commands/merlin/profiles.md +215 -0
- package/files/commands/merlin/promote.md +176 -0
- package/files/commands/merlin/quick.md +229 -0
- package/files/commands/merlin/resume-work.md +27 -1
- package/files/commands/merlin/route.md +43 -1
- package/files/commands/merlin/sandbox.md +359 -0
- package/files/commands/merlin/usage.md +55 -0
- package/files/docker/Dockerfile.merlin +20 -0
- package/files/docker/docker-compose.merlin.yml +23 -0
- package/files/hook-templates/auto-commit.sh +64 -0
- package/files/hook-templates/auto-format.sh +95 -0
- package/files/hook-templates/auto-test.sh +117 -0
- package/files/hook-templates/branch-protection.sh +72 -0
- package/files/hook-templates/changelog-reminder.sh +76 -0
- package/files/hook-templates/complexity-check.sh +112 -0
- package/files/hook-templates/import-audit.sh +83 -0
- package/files/hook-templates/license-header.sh +84 -0
- package/files/hook-templates/pr-description.sh +100 -0
- package/files/hook-templates/todo-tracker.sh +80 -0
- package/files/hooks/check-file-size.sh +17 -4
- package/files/hooks/config-change.sh +44 -16
- package/files/hooks/instructions-loaded.sh +22 -5
- package/files/hooks/notify-desktop.sh +157 -0
- package/files/hooks/notify-webhook.sh +141 -0
- package/files/hooks/pre-edit-sights-check.sh +76 -9
- package/files/hooks/security-scanner.sh +153 -0
- package/files/hooks/session-end-memory-sync.sh +97 -0
- package/files/hooks/session-end.sh +274 -1
- package/files/hooks/session-start.sh +19 -6
- package/files/hooks/smart-approve.sh +270 -0
- package/files/hooks/teammate-idle-verify.sh +87 -12
- package/files/hooks/worktree-create.sh +20 -3
- package/files/hooks/worktree-remove.sh +21 -3
- package/files/merlin/references/plan-format.md +37 -9
- package/files/merlin/sandbox.json +9 -0
- package/files/merlin/security.json +11 -0
- package/files/merlin/templates/ci/docs-update.yml +81 -0
- package/files/merlin/templates/ci/pr-review.yml +50 -0
- package/files/merlin/templates/ci/security-audit.yml +74 -0
- package/files/merlin/templates/config.json +9 -1
- package/files/rules/api-rules.md +30 -0
- package/files/rules/frontend-rules.md +25 -0
- package/files/rules/hooks-rules.md +36 -0
- package/files/rules/mcp-rules.md +30 -0
- package/files/rules/worker-rules.md +29 -0
- package/package.json +5 -2
|
@@ -34,12 +34,14 @@ Both modes spawn a fresh process. The difference is whether the orchestrator gat
|
|
|
34
34
|
Extract from $ARGUMENTS:
|
|
35
35
|
- **agent-name**: First word (e.g., `product-spec`, `implementation-dev`)
|
|
36
36
|
- **task-description**: Everything after the first word (may be quoted)
|
|
37
|
+
- **--sandbox**: Optional flag — if present, run the agent inside the Docker sandbox
|
|
37
38
|
|
|
38
39
|
If no arguments provided:
|
|
39
40
|
```
|
|
40
|
-
Usage: /merlin:route <agent-name> "task description"
|
|
41
|
+
Usage: /merlin:route <agent-name> "task description" [--sandbox]
|
|
41
42
|
|
|
42
43
|
Example: /merlin:route product-spec "turn this into a spec: user wants SSO login"
|
|
44
|
+
Example: /merlin:route implementation-dev "refactor auth module" --sandbox
|
|
43
45
|
```
|
|
44
46
|
List available agents and exit.
|
|
45
47
|
|
|
@@ -173,6 +175,8 @@ Also write a detailed result to: {RESULT_FILE}
|
|
|
173
175
|
|
|
174
176
|
This is the critical step. ALWAYS use `claude --agent` via Bash for true process isolation.
|
|
175
177
|
|
|
178
|
+
**Standard spawn (no --sandbox):**
|
|
179
|
+
|
|
176
180
|
```bash
|
|
177
181
|
# Spawn fresh Claude with the agent's system prompt and our handoff.
|
|
178
182
|
# Unset CLAUDECODE so the child doesn't reject as "nested session" —
|
|
@@ -186,6 +190,41 @@ RESULT=$(unset CLAUDECODE && cat "$HANDOFF_FILE" | claude \
|
|
|
186
190
|
EXIT_CODE=$?
|
|
187
191
|
```
|
|
188
192
|
|
|
193
|
+
**Sandbox spawn (--sandbox flag present):**
|
|
194
|
+
|
|
195
|
+
When `--sandbox` is detected in $ARGUMENTS, route through Docker instead:
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# Verify Docker is available
|
|
199
|
+
docker --version 2>/dev/null || {
|
|
200
|
+
echo "Docker not installed. Cannot use --sandbox. Run /merlin:sandbox for setup."
|
|
201
|
+
exit 1
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# Verify image exists
|
|
205
|
+
docker images merlin-sandbox:latest -q 2>/dev/null | grep -q . || {
|
|
206
|
+
echo "Sandbox image not built. Run /merlin:sandbox build first."
|
|
207
|
+
exit 1
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
# Spawn agent inside Docker sandbox
|
|
211
|
+
# Project is mounted read-only. Agent writes results to /workspace-output.
|
|
212
|
+
RESULT=$(docker run --rm \
|
|
213
|
+
--network none \
|
|
214
|
+
--cpus="2" \
|
|
215
|
+
--memory="4g" \
|
|
216
|
+
-v "$(pwd):/workspace:ro" \
|
|
217
|
+
-v "merlin-output:/workspace-output" \
|
|
218
|
+
-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}" \
|
|
219
|
+
-e "MERLIN_API_KEY=${MERLIN_API_KEY:-}" \
|
|
220
|
+
merlin-sandbox:latest \
|
|
221
|
+
/bin/sh -c "cat > /tmp/handoff.md && unset CLAUDECODE && claude --agent {agent-name} -p --permission-mode acceptEdits --output-format text < /tmp/handoff.md" \
|
|
222
|
+
< "$HANDOFF_FILE" 2>&1)
|
|
223
|
+
EXIT_CODE=$?
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Note: In sandbox mode, the agent writes code to `/workspace-output`, not to the project directory. Inform the user they need to copy results out manually or use `/merlin:sandbox run` for a more guided experience.
|
|
227
|
+
|
|
189
228
|
**Parameters explained:**
|
|
190
229
|
- `--agent {agent-name}`: Loads agent system prompt from `~/.claude/agents/{agent-name}.md`
|
|
191
230
|
- `-p`: Print mode — non-interactive, outputs result and exits
|
|
@@ -376,4 +415,7 @@ The orchestrator's context stays lean because:
|
|
|
376
415
|
- Both modes get identical fresh-process treatment
|
|
377
416
|
- Keep handoff files in /tmp — ephemeral, auto-cleaned on reboot
|
|
378
417
|
- Max 10 minute timeout per spawn — for heavier work, use merlin-loop
|
|
418
|
+
- `--sandbox` is optional — Docker is NOT required for normal Merlin operation
|
|
419
|
+
- In sandbox mode, the project is mounted read-only; agent output goes to /workspace-output
|
|
420
|
+
- Use `/merlin:sandbox build` to prepare the image before using --sandbox
|
|
379
421
|
</design_notes>
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: merlin:sandbox
|
|
3
|
+
description: Run agents inside a Docker sandbox — isolated, read-only project mount, resource-limited
|
|
4
|
+
argument-hint: "build | run \"task\" | shell | status"
|
|
5
|
+
allowed-tools:
|
|
6
|
+
- Read
|
|
7
|
+
- Bash
|
|
8
|
+
- Glob
|
|
9
|
+
- Grep
|
|
10
|
+
- Write
|
|
11
|
+
- AskUserQuestion
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
<objective>
|
|
15
|
+
Docker-based sandboxed execution for running agents safely.
|
|
16
|
+
|
|
17
|
+
Project is mounted read-only. Agent output goes to a writable volume at /workspace-output.
|
|
18
|
+
No network access by default. CPU and memory are capped.
|
|
19
|
+
|
|
20
|
+
Four sub-commands:
|
|
21
|
+
- `build` — Build the merlin-sandbox Docker image
|
|
22
|
+
- `run` — Run a task inside the sandbox via a fresh claude agent
|
|
23
|
+
- `shell` — Open an interactive shell inside the sandbox
|
|
24
|
+
- `status` — Check if Docker is installed and image is built
|
|
25
|
+
</objective>
|
|
26
|
+
|
|
27
|
+
<context>
|
|
28
|
+
Arguments: $ARGUMENTS
|
|
29
|
+
</context>
|
|
30
|
+
|
|
31
|
+
<process>
|
|
32
|
+
|
|
33
|
+
## Step 1: Parse Sub-Command
|
|
34
|
+
|
|
35
|
+
Extract from $ARGUMENTS:
|
|
36
|
+
- **sub-command**: First word (`build`, `run`, `shell`, `status`)
|
|
37
|
+
- **task**: Everything after the sub-command (only relevant for `run`)
|
|
38
|
+
|
|
39
|
+
If no arguments provided, show usage and exit:
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
Usage: /merlin:sandbox <sub-command> [options]
|
|
43
|
+
|
|
44
|
+
Sub-commands:
|
|
45
|
+
build Build the merlin-sandbox Docker image
|
|
46
|
+
run "task" Run a task inside the sandbox
|
|
47
|
+
shell Open an interactive shell in the sandbox
|
|
48
|
+
status Check Docker availability and image status
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
/merlin:sandbox build
|
|
52
|
+
/merlin:sandbox run "refactor the auth module"
|
|
53
|
+
/merlin:sandbox shell
|
|
54
|
+
/merlin:sandbox status
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Step 2: Check Docker is Installed
|
|
58
|
+
|
|
59
|
+
Run this check for ALL sub-commands before proceeding:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
docker --version 2>/dev/null
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
If Docker is not installed or the command fails:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
Docker is not installed or not running.
|
|
69
|
+
|
|
70
|
+
Sandboxed execution requires Docker Desktop (or Docker Engine on Linux).
|
|
71
|
+
|
|
72
|
+
Install: https://docs.docker.com/get-docker/
|
|
73
|
+
|
|
74
|
+
Normal Merlin operation does not require Docker — /merlin:sandbox is optional.
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Exit. Do not proceed with any sub-command.
|
|
78
|
+
|
|
79
|
+
## Step 3: Route to Sub-Command
|
|
80
|
+
|
|
81
|
+
### Sub-command: `status`
|
|
82
|
+
|
|
83
|
+
Check if Docker daemon is running and whether the sandbox image exists:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
docker info --format '{{.ServerVersion}}' 2>/dev/null
|
|
87
|
+
docker images merlin-sandbox:latest --format '{{.Repository}}:{{.Tag}} ({{.Size}}, created {{.CreatedSince}})' 2>/dev/null
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Display:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
════════════════════════════════════════
|
|
94
|
+
Merlin Sandbox Status
|
|
95
|
+
════════════════════════════════════════
|
|
96
|
+
|
|
97
|
+
Docker: running (version {X.Y.Z})
|
|
98
|
+
Image: {built — merlin-sandbox:latest, ~NNN MB, created N days ago}
|
|
99
|
+
OR {not built — run /merlin:sandbox build}
|
|
100
|
+
|
|
101
|
+
Config: ~/.claude/merlin/sandbox.json
|
|
102
|
+
Network: disabled (network_mode: none)
|
|
103
|
+
CPU limit: 2 cores
|
|
104
|
+
Memory limit: 4 GB
|
|
105
|
+
Mount mode: read-only
|
|
106
|
+
Output volume: merlin-output (writable at /workspace-output)
|
|
107
|
+
────────────────────────────────────────
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
If image not built, append:
|
|
111
|
+
```
|
|
112
|
+
Run `/merlin:sandbox build` to build the image (takes ~2-3 minutes).
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
### Sub-command: `build`
|
|
118
|
+
|
|
119
|
+
Locate the Dockerfile. Check in this order:
|
|
120
|
+
1. `~/.claude/merlin/docker/Dockerfile.merlin`
|
|
121
|
+
2. `.merlin/docker/Dockerfile.merlin`
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
DOCKERFILE_PATH=""
|
|
125
|
+
[ -f "$HOME/.claude/merlin/docker/Dockerfile.merlin" ] && DOCKERFILE_PATH="$HOME/.claude/merlin/docker/Dockerfile.merlin"
|
|
126
|
+
[ -z "$DOCKERFILE_PATH" ] && [ -f ".merlin/docker/Dockerfile.merlin" ] && DOCKERFILE_PATH=".merlin/docker/Dockerfile.merlin"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
If not found:
|
|
130
|
+
```
|
|
131
|
+
Dockerfile.merlin not found.
|
|
132
|
+
|
|
133
|
+
Expected at: ~/.claude/merlin/docker/Dockerfile.merlin
|
|
134
|
+
|
|
135
|
+
Try reinstalling Merlin: npx create-merlin-brain --yes
|
|
136
|
+
```
|
|
137
|
+
Exit.
|
|
138
|
+
|
|
139
|
+
Build the image:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
docker build \
|
|
143
|
+
-t merlin-sandbox:latest \
|
|
144
|
+
-f "$DOCKERFILE_PATH" \
|
|
145
|
+
"$(dirname "$DOCKERFILE_PATH")" \
|
|
146
|
+
2>&1
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Use Bash tool with `timeout: 300000` (5 minutes — image build can take a while).
|
|
150
|
+
|
|
151
|
+
On success:
|
|
152
|
+
```
|
|
153
|
+
════════════════════════════════════════
|
|
154
|
+
Sandbox image built
|
|
155
|
+
════════════════════════════════════════
|
|
156
|
+
|
|
157
|
+
Image: merlin-sandbox:latest
|
|
158
|
+
Ready for: /merlin:sandbox run "task" | /merlin:sandbox shell
|
|
159
|
+
────────────────────────────────────────
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
On failure, show the Docker build output and suggest:
|
|
163
|
+
```
|
|
164
|
+
Build failed. Common causes:
|
|
165
|
+
- No internet connection during build (npm install needs network)
|
|
166
|
+
- Docker daemon not running
|
|
167
|
+
- Insufficient disk space
|
|
168
|
+
|
|
169
|
+
Check `docker system df` for disk usage.
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
### Sub-command: `run`
|
|
175
|
+
|
|
176
|
+
Requires a task description after `run`. If missing:
|
|
177
|
+
```
|
|
178
|
+
Missing task description.
|
|
179
|
+
|
|
180
|
+
Usage: /merlin:sandbox run "task description"
|
|
181
|
+
|
|
182
|
+
Example: /merlin:sandbox run "refactor the payment module"
|
|
183
|
+
```
|
|
184
|
+
Exit.
|
|
185
|
+
|
|
186
|
+
Check if image exists:
|
|
187
|
+
```bash
|
|
188
|
+
docker images merlin-sandbox:latest -q 2>/dev/null
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
If image is not built, prompt:
|
|
192
|
+
```
|
|
193
|
+
Sandbox image not found. Build it first?
|
|
194
|
+
```
|
|
195
|
+
Use AskUserQuestion. If user confirms, run the build step first (same as `build` sub-command), then proceed.
|
|
196
|
+
|
|
197
|
+
Determine the project directory:
|
|
198
|
+
```bash
|
|
199
|
+
PROJECT_DIR=$(pwd)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Run the agent inside the sandbox:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
docker run --rm \
|
|
206
|
+
--network none \
|
|
207
|
+
--cpus="2" \
|
|
208
|
+
--memory="4g" \
|
|
209
|
+
-v "${PROJECT_DIR}:/workspace:ro" \
|
|
210
|
+
-v "merlin-output:/workspace-output" \
|
|
211
|
+
-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}" \
|
|
212
|
+
-e "MERLIN_API_KEY=${MERLIN_API_KEY:-}" \
|
|
213
|
+
merlin-sandbox:latest \
|
|
214
|
+
claude -p \
|
|
215
|
+
--permission-mode acceptEdits \
|
|
216
|
+
--output-format text \
|
|
217
|
+
-- "{task-description}" \
|
|
218
|
+
2>&1
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Use Bash tool with `timeout: 600000` (10 minutes).
|
|
222
|
+
|
|
223
|
+
After completion:
|
|
224
|
+
|
|
225
|
+
1. Check exit code — non-zero is an error; show output and suggest `/merlin:sandbox shell` for debugging.
|
|
226
|
+
|
|
227
|
+
2. Show a summary of any output files written:
|
|
228
|
+
```bash
|
|
229
|
+
docker run --rm -v "merlin-output:/workspace-output" \
|
|
230
|
+
alpine find /workspace-output -type f 2>/dev/null
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
3. Display result:
|
|
234
|
+
```
|
|
235
|
+
════════════════════════════════════════
|
|
236
|
+
Sandbox run complete
|
|
237
|
+
════════════════════════════════════════
|
|
238
|
+
|
|
239
|
+
{agent output summary}
|
|
240
|
+
|
|
241
|
+
Output files (at /workspace-output):
|
|
242
|
+
{list of files or "none"}
|
|
243
|
+
|
|
244
|
+
To copy output to current directory:
|
|
245
|
+
docker run --rm \
|
|
246
|
+
-v merlin-output:/src \
|
|
247
|
+
-v "$(pwd)":/dst \
|
|
248
|
+
alpine cp -r /src/. /dst/sandbox-output/
|
|
249
|
+
────────────────────────────────────────
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
### Sub-command: `shell`
|
|
255
|
+
|
|
256
|
+
Check if image exists (same check as `run`). Offer to build if missing.
|
|
257
|
+
|
|
258
|
+
Open an interactive shell in the sandbox:
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
docker run --rm -it \
|
|
262
|
+
--network none \
|
|
263
|
+
--cpus="2" \
|
|
264
|
+
--memory="4g" \
|
|
265
|
+
-v "$(pwd):/workspace:ro" \
|
|
266
|
+
-v "merlin-output:/workspace-output" \
|
|
267
|
+
-e "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}" \
|
|
268
|
+
-e "MERLIN_API_KEY=${MERLIN_API_KEY:-}" \
|
|
269
|
+
merlin-sandbox:latest \
|
|
270
|
+
/bin/bash 2>&1
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
Note before running:
|
|
274
|
+
```
|
|
275
|
+
Opening sandbox shell.
|
|
276
|
+
|
|
277
|
+
Project mounted read-only at: /workspace
|
|
278
|
+
Write output to: /workspace-output
|
|
279
|
+
|
|
280
|
+
Type 'exit' to leave the sandbox.
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Step 4: Error Display Format
|
|
284
|
+
|
|
285
|
+
All errors use this format:
|
|
286
|
+
|
|
287
|
+
```
|
|
288
|
+
Sandbox error: {short description}
|
|
289
|
+
|
|
290
|
+
{detail or Docker output}
|
|
291
|
+
|
|
292
|
+
{suggested fix}
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
</process>
|
|
296
|
+
|
|
297
|
+
<sandbox_notes>
|
|
298
|
+
|
|
299
|
+
## What the sandbox does
|
|
300
|
+
|
|
301
|
+
- Mounts your project read-only — the agent cannot modify your source files
|
|
302
|
+
- Gives the agent a writable volume at /workspace-output for its results
|
|
303
|
+
- Blocks all network access (network_mode: none)
|
|
304
|
+
- Caps CPU at 2 cores and memory at 4 GB
|
|
305
|
+
- Destroys the container after the run (--rm)
|
|
306
|
+
- Never persists container state between runs
|
|
307
|
+
|
|
308
|
+
## --sandbox flag in /merlin:route
|
|
309
|
+
|
|
310
|
+
When routing agents with `/merlin:route`, pass `--sandbox` to route the task through the Docker sandbox instead of spawning a local process:
|
|
311
|
+
|
|
312
|
+
```
|
|
313
|
+
/merlin:route implementation-dev "refactor auth" --sandbox
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
This causes the orchestrator to use `docker run merlin-sandbox:latest claude --agent ...` instead of a bare `claude --agent ...` call. Requirements:
|
|
317
|
+
|
|
318
|
+
- Docker must be installed and running
|
|
319
|
+
- `merlin-sandbox:latest` image must be built (`/merlin:sandbox build`)
|
|
320
|
+
- Project is mounted read-only — the agent writes results to /workspace-output
|
|
321
|
+
- The flag is optional — normal routing does not require Docker
|
|
322
|
+
|
|
323
|
+
## When to use the sandbox
|
|
324
|
+
|
|
325
|
+
Use it when:
|
|
326
|
+
- Running untrusted or experimental agent tasks
|
|
327
|
+
- You want to guarantee the agent cannot modify source files
|
|
328
|
+
- Running automated tasks in CI where isolation matters
|
|
329
|
+
|
|
330
|
+
Do not use it for normal development work — the read-only mount means the agent
|
|
331
|
+
cannot commit changes directly to your working directory.
|
|
332
|
+
|
|
333
|
+
</sandbox_notes>
|
|
334
|
+
|
|
335
|
+
<error_handling>
|
|
336
|
+
|
|
337
|
+
| Condition | Action |
|
|
338
|
+
|-----------|--------|
|
|
339
|
+
| Docker not installed | Show install link, exit gracefully |
|
|
340
|
+
| Docker daemon not running | Tell user to start Docker, exit |
|
|
341
|
+
| Image not built | Offer to build, then proceed |
|
|
342
|
+
| Build fails | Show Docker output, suggest common fixes |
|
|
343
|
+
| Run times out (>10 min) | Warn: task too large for sandbox, suggest splitting |
|
|
344
|
+
| No ANTHROPIC_API_KEY | Warn that agent will fail without the key |
|
|
345
|
+
| No task in `run` | Show usage, exit |
|
|
346
|
+
| Unknown sub-command | Show usage, exit |
|
|
347
|
+
|
|
348
|
+
</error_handling>
|
|
349
|
+
|
|
350
|
+
<success_criteria>
|
|
351
|
+
- [ ] Docker check runs before any sub-command
|
|
352
|
+
- [ ] Graceful error when Docker is not installed (does not crash Merlin)
|
|
353
|
+
- [ ] `status` shows image existence and config
|
|
354
|
+
- [ ] `build` produces merlin-sandbox:latest
|
|
355
|
+
- [ ] `run` mounts project read-only, blocks network, caps resources
|
|
356
|
+
- [ ] `shell` opens interactive bash in the container
|
|
357
|
+
- [ ] Output files listed after a run
|
|
358
|
+
- [ ] Normal Merlin operation is never blocked by Docker absence
|
|
359
|
+
</success_criteria>
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: merlin:usage
|
|
3
|
+
description: Show session usage report — per-agent cost breakdown, token counts, and savings vs all-Opus baseline
|
|
4
|
+
allowed-tools:
|
|
5
|
+
- mcp__merlin__merlin_session_cost
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
<objective>
|
|
9
|
+
Display a formatted usage report for the current session.
|
|
10
|
+
|
|
11
|
+
Shows per-agent call counts, token usage, estimated cost, and how much was saved
|
|
12
|
+
versus routing every task to Opus. Useful for auditing spend and verifying that
|
|
13
|
+
cost-aware routing is working correctly.
|
|
14
|
+
</objective>
|
|
15
|
+
|
|
16
|
+
<process>
|
|
17
|
+
|
|
18
|
+
## Step 1: Fetch session cost data
|
|
19
|
+
|
|
20
|
+
Call `merlin_session_cost` to get the current session breakdown.
|
|
21
|
+
|
|
22
|
+
## Step 2: Display the report
|
|
23
|
+
|
|
24
|
+
If no routing calls have been recorded yet, show:
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
Usage Report
|
|
28
|
+
|
|
29
|
+
No routing calls recorded yet.
|
|
30
|
+
|
|
31
|
+
Costs are tracked automatically when merlin_route is used.
|
|
32
|
+
Run /merlin:route or delegate a task to an agent to start tracking.
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Otherwise, render the data returned by `merlin_session_cost` as-is — the tool
|
|
36
|
+
already formats a complete markdown report including:
|
|
37
|
+
|
|
38
|
+
- Per-agent table (calls, tokens in/out, estimated cost)
|
|
39
|
+
- Per-model breakdown (Haiku / Sonnet / Opus)
|
|
40
|
+
- Summary table with actual cost, Opus baseline, and savings
|
|
41
|
+
|
|
42
|
+
After displaying, add a one-line tip:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
Tip: Run /merlin:usage any time during a session to check your spend.
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
</process>
|
|
49
|
+
|
|
50
|
+
<success_criteria>
|
|
51
|
+
- [ ] Calls merlin_session_cost and displays output
|
|
52
|
+
- [ ] Shows per-agent breakdown when routing calls exist
|
|
53
|
+
- [ ] Graceful message when no calls recorded
|
|
54
|
+
- [ ] Concise — output fits in a single scroll
|
|
55
|
+
</success_criteria>
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM node:20-slim
|
|
2
|
+
|
|
3
|
+
# Install essentials
|
|
4
|
+
RUN apt-get update && apt-get install -y \
|
|
5
|
+
git curl jq bash \
|
|
6
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
7
|
+
|
|
8
|
+
# Install Claude Code CLI
|
|
9
|
+
RUN npm install -g @anthropic-ai/claude-code
|
|
10
|
+
|
|
11
|
+
# Install Merlin
|
|
12
|
+
RUN npx create-merlin-brain --yes 2>/dev/null || true
|
|
13
|
+
|
|
14
|
+
# Create non-root user
|
|
15
|
+
RUN useradd -m -s /bin/bash merlin
|
|
16
|
+
USER merlin
|
|
17
|
+
WORKDIR /workspace
|
|
18
|
+
|
|
19
|
+
# Default: run Claude Code
|
|
20
|
+
ENTRYPOINT ["claude"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
version: '3.8'
|
|
2
|
+
services:
|
|
3
|
+
merlin-sandbox:
|
|
4
|
+
build:
|
|
5
|
+
context: .
|
|
6
|
+
dockerfile: Dockerfile.merlin
|
|
7
|
+
volumes:
|
|
8
|
+
- ${PROJECT_DIR:-.}:/workspace:ro
|
|
9
|
+
- merlin-output:/workspace-output
|
|
10
|
+
environment:
|
|
11
|
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
|
12
|
+
- MERLIN_API_KEY=${MERLIN_API_KEY}
|
|
13
|
+
network_mode: "none" # No network by default
|
|
14
|
+
deploy:
|
|
15
|
+
resources:
|
|
16
|
+
limits:
|
|
17
|
+
cpus: '2'
|
|
18
|
+
memory: 4G
|
|
19
|
+
stdin_open: true
|
|
20
|
+
tty: true
|
|
21
|
+
|
|
22
|
+
volumes:
|
|
23
|
+
merlin-output:
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# Hook Template: auto-commit.sh
|
|
4
|
+
# Event: Stop
|
|
5
|
+
#
|
|
6
|
+
# Stages and commits all changes at the end of a Claude session.
|
|
7
|
+
# Uses the session summary or a generic message as the commit message.
|
|
8
|
+
#
|
|
9
|
+
# HOW TO INSTALL:
|
|
10
|
+
# Copy this file to ~/.claude/merlin/hooks/auto-commit.sh
|
|
11
|
+
# Then add to your .claude/settings.local.json:
|
|
12
|
+
#
|
|
13
|
+
# {
|
|
14
|
+
# "hooks": {
|
|
15
|
+
# "Stop": [
|
|
16
|
+
# {
|
|
17
|
+
# "hooks": [{ "type": "command", "command": "~/.claude/merlin/hooks/auto-commit.sh" }]
|
|
18
|
+
# }
|
|
19
|
+
# ]
|
|
20
|
+
# }
|
|
21
|
+
# }
|
|
22
|
+
#
|
|
23
|
+
# BEHAVIOR: Advisory only. Commits if there are staged/unstaged changes.
|
|
24
|
+
# WARNING: This commits automatically — only enable if you trust auto-commits.
|
|
25
|
+
# Consider using changelog-reminder.sh instead for a softer nudge.
|
|
26
|
+
#
|
|
27
|
+
set -euo pipefail
|
|
28
|
+
trap 'echo "{}"; exit 0' ERR
|
|
29
|
+
|
|
30
|
+
# Only run if we're in a git repo
|
|
31
|
+
git rev-parse --git-dir >/dev/null 2>&1 || { echo '{}'; exit 0; }
|
|
32
|
+
|
|
33
|
+
# Check for changes
|
|
34
|
+
if git diff --quiet && git diff --cached --quiet; then
|
|
35
|
+
echo "auto-commit: no changes to commit" >&2
|
|
36
|
+
echo '{}'
|
|
37
|
+
exit 0
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Read stop hook input for session summary
|
|
41
|
+
input=""
|
|
42
|
+
if [ ! -t 0 ]; then
|
|
43
|
+
input=$(cat 2>/dev/null || true)
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
# Try to extract stop reason or session summary from hook input
|
|
47
|
+
commit_msg="chore: auto-commit by Claude Code session"
|
|
48
|
+
if [ -n "$input" ] && command -v jq >/dev/null 2>&1; then
|
|
49
|
+
stop_reason=$(echo "$input" | jq -r '.stop_reason // empty' 2>/dev/null || true)
|
|
50
|
+
if [ -n "$stop_reason" ] && [ "$stop_reason" != "null" ]; then
|
|
51
|
+
commit_msg="chore: ${stop_reason}"
|
|
52
|
+
fi
|
|
53
|
+
fi
|
|
54
|
+
|
|
55
|
+
# Stage all changes
|
|
56
|
+
git add -A
|
|
57
|
+
|
|
58
|
+
# Commit
|
|
59
|
+
git commit -m "$commit_msg" >/dev/null 2>&1 && \
|
|
60
|
+
echo "auto-commit: committed changes — \"$commit_msg\"" >&2 || \
|
|
61
|
+
echo "auto-commit: commit failed (pre-commit hook may have blocked)" >&2
|
|
62
|
+
|
|
63
|
+
echo '{}'
|
|
64
|
+
exit 0
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# Hook Template: auto-format.sh
|
|
4
|
+
# Event: PostToolUse (Write, Edit)
|
|
5
|
+
#
|
|
6
|
+
# Automatically runs Prettier and/or ESLint after Claude writes or edits a file.
|
|
7
|
+
# Keeps code style consistent without manual intervention.
|
|
8
|
+
#
|
|
9
|
+
# HOW TO INSTALL:
|
|
10
|
+
# Copy this file to ~/.claude/merlin/hooks/auto-format.sh
|
|
11
|
+
# Then add to your .claude/settings.local.json:
|
|
12
|
+
#
|
|
13
|
+
# {
|
|
14
|
+
# "hooks": {
|
|
15
|
+
# "PostToolUse": [
|
|
16
|
+
# {
|
|
17
|
+
# "matcher": "Write|Edit",
|
|
18
|
+
# "hooks": [{ "type": "command", "command": "~/.claude/merlin/hooks/auto-format.sh" }]
|
|
19
|
+
# }
|
|
20
|
+
# ]
|
|
21
|
+
# }
|
|
22
|
+
# }
|
|
23
|
+
#
|
|
24
|
+
# REQUIREMENTS: prettier and/or eslint must be installed in the project.
|
|
25
|
+
# BEHAVIOR: Advisory only — never blocks. Runs format in background and exits.
|
|
26
|
+
#
|
|
27
|
+
set -euo pipefail
|
|
28
|
+
trap 'echo "{}"; exit 0' ERR
|
|
29
|
+
|
|
30
|
+
# Read tool input from stdin
|
|
31
|
+
input=""
|
|
32
|
+
if [ ! -t 0 ]; then
|
|
33
|
+
input=$(cat 2>/dev/null || true)
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
[ -z "$input" ] && { echo '{}'; exit 0; }
|
|
37
|
+
|
|
38
|
+
# Extract file path
|
|
39
|
+
file_path=""
|
|
40
|
+
if command -v jq >/dev/null 2>&1; then
|
|
41
|
+
file_path=$(echo "$input" | jq -r '.tool_input.file_path // .tool_input.path // empty' 2>/dev/null || true)
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
[ -z "$file_path" ] || [ ! -f "$file_path" ] && { echo '{}'; exit 0; }
|
|
45
|
+
|
|
46
|
+
# Only format code files
|
|
47
|
+
case "$file_path" in
|
|
48
|
+
*.js|*.jsx|*.ts|*.tsx|*.mjs|*.cjs|*.json|*.css|*.scss|*.html|*.vue|*.svelte|*.md)
|
|
49
|
+
;;
|
|
50
|
+
*)
|
|
51
|
+
echo '{}'
|
|
52
|
+
exit 0
|
|
53
|
+
;;
|
|
54
|
+
esac
|
|
55
|
+
|
|
56
|
+
# Skip node_modules and generated files
|
|
57
|
+
case "$file_path" in
|
|
58
|
+
*node_modules*|*.min.js|*.bundle.js|*dist/*|*build/*)
|
|
59
|
+
echo '{}'
|
|
60
|
+
exit 0
|
|
61
|
+
;;
|
|
62
|
+
esac
|
|
63
|
+
|
|
64
|
+
# Try Prettier first (project-local, then global)
|
|
65
|
+
PRETTIER=""
|
|
66
|
+
if [ -f "node_modules/.bin/prettier" ]; then
|
|
67
|
+
PRETTIER="node_modules/.bin/prettier"
|
|
68
|
+
elif command -v prettier >/dev/null 2>&1; then
|
|
69
|
+
PRETTIER="prettier"
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
if [ -n "$PRETTIER" ]; then
|
|
73
|
+
$PRETTIER --write "$file_path" >/dev/null 2>&1 || true
|
|
74
|
+
echo "auto-format: prettier applied to $file_path" >&2
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
# Try ESLint for JS/TS files (fix mode)
|
|
78
|
+
case "$file_path" in
|
|
79
|
+
*.js|*.jsx|*.ts|*.tsx|*.mjs|*.cjs)
|
|
80
|
+
ESLINT=""
|
|
81
|
+
if [ -f "node_modules/.bin/eslint" ]; then
|
|
82
|
+
ESLINT="node_modules/.bin/eslint"
|
|
83
|
+
elif command -v eslint >/dev/null 2>&1; then
|
|
84
|
+
ESLINT="eslint"
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
if [ -n "$ESLINT" ]; then
|
|
88
|
+
$ESLINT --fix "$file_path" >/dev/null 2>&1 || true
|
|
89
|
+
echo "auto-format: eslint --fix applied to $file_path" >&2
|
|
90
|
+
fi
|
|
91
|
+
;;
|
|
92
|
+
esac
|
|
93
|
+
|
|
94
|
+
echo '{}'
|
|
95
|
+
exit 0
|