@ai-hero/sandcastle 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +384 -58
  2. package/dist/AgentProvider.d.ts +22 -12
  3. package/dist/AgentProvider.d.ts.map +1 -1
  4. package/dist/AgentProvider.js +46 -47
  5. package/dist/AgentProvider.js.map +1 -1
  6. package/dist/DockerLifecycle.d.ts +5 -1
  7. package/dist/DockerLifecycle.d.ts.map +1 -1
  8. package/dist/DockerLifecycle.js +8 -1
  9. package/dist/DockerLifecycle.js.map +1 -1
  10. package/dist/InitService.d.ts.map +1 -1
  11. package/dist/InitService.js +57 -6
  12. package/dist/InitService.js.map +1 -1
  13. package/dist/MountConfig.d.ts +15 -0
  14. package/dist/MountConfig.d.ts.map +1 -0
  15. package/dist/MountConfig.js +7 -0
  16. package/dist/MountConfig.js.map +1 -0
  17. package/dist/Orchestrator.d.ts +0 -1
  18. package/dist/Orchestrator.d.ts.map +1 -1
  19. package/dist/Orchestrator.js +28 -29
  20. package/dist/Orchestrator.js.map +1 -1
  21. package/dist/SandboxFactory.d.ts +21 -17
  22. package/dist/SandboxFactory.d.ts.map +1 -1
  23. package/dist/SandboxFactory.js +48 -50
  24. package/dist/SandboxFactory.js.map +1 -1
  25. package/dist/SandboxLifecycle.d.ts +1 -1
  26. package/dist/SandboxLifecycle.d.ts.map +1 -1
  27. package/dist/SandboxLifecycle.js +2 -2
  28. package/dist/SandboxLifecycle.js.map +1 -1
  29. package/dist/SandboxProvider.d.ts +50 -13
  30. package/dist/SandboxProvider.d.ts.map +1 -1
  31. package/dist/SandboxProvider.js +2 -0
  32. package/dist/SandboxProvider.js.map +1 -1
  33. package/dist/TextDeltaBuffer.d.ts +24 -0
  34. package/dist/TextDeltaBuffer.d.ts.map +1 -0
  35. package/dist/TextDeltaBuffer.js +68 -0
  36. package/dist/TextDeltaBuffer.js.map +1 -0
  37. package/dist/WorktreeManager.d.ts +2 -0
  38. package/dist/WorktreeManager.d.ts.map +1 -1
  39. package/dist/WorktreeManager.js +3 -0
  40. package/dist/WorktreeManager.js.map +1 -1
  41. package/dist/cli.d.ts.map +1 -1
  42. package/dist/cli.js +11 -6
  43. package/dist/cli.js.map +1 -1
  44. package/dist/createSandbox.d.ts +6 -5
  45. package/dist/createSandbox.d.ts.map +1 -1
  46. package/dist/createSandbox.js +14 -6
  47. package/dist/createSandbox.js.map +1 -1
  48. package/dist/index.d.ts +5 -4
  49. package/dist/index.d.ts.map +1 -1
  50. package/dist/index.js +1 -1
  51. package/dist/index.js.map +1 -1
  52. package/dist/mergeProviderEnv.d.ts +13 -0
  53. package/dist/mergeProviderEnv.d.ts.map +1 -0
  54. package/dist/mergeProviderEnv.js +23 -0
  55. package/dist/mergeProviderEnv.js.map +1 -0
  56. package/dist/run.d.ts +7 -18
  57. package/dist/run.d.ts.map +1 -1
  58. package/dist/run.js +35 -22
  59. package/dist/run.js.map +1 -1
  60. package/dist/sandboxes/daytona.d.ts +48 -0
  61. package/dist/sandboxes/daytona.d.ts.map +1 -0
  62. package/dist/sandboxes/daytona.js +125 -0
  63. package/dist/sandboxes/daytona.js.map +1 -0
  64. package/dist/sandboxes/docker.d.ts +10 -0
  65. package/dist/sandboxes/docker.d.ts.map +1 -1
  66. package/dist/sandboxes/docker.js +69 -42
  67. package/dist/sandboxes/docker.js.map +1 -1
  68. package/dist/sandboxes/podman.d.ts +46 -0
  69. package/dist/sandboxes/podman.d.ts.map +1 -0
  70. package/dist/sandboxes/podman.js +195 -0
  71. package/dist/sandboxes/podman.js.map +1 -0
  72. package/dist/sandboxes/test-isolated.d.ts +1 -1
  73. package/dist/sandboxes/test-isolated.d.ts.map +1 -1
  74. package/dist/sandboxes/test-isolated.js +56 -45
  75. package/dist/sandboxes/test-isolated.js.map +1 -1
  76. package/dist/sandboxes/vercel.d.ts +92 -0
  77. package/dist/sandboxes/vercel.d.ts.map +1 -0
  78. package/dist/sandboxes/vercel.js +165 -0
  79. package/dist/sandboxes/vercel.js.map +1 -0
  80. package/dist/syncIn.d.ts +4 -2
  81. package/dist/syncIn.d.ts.map +1 -1
  82. package/dist/syncIn.js +72 -22
  83. package/dist/syncIn.js.map +1 -1
  84. package/dist/syncOut.d.ts +4 -2
  85. package/dist/syncOut.d.ts.map +1 -1
  86. package/dist/syncOut.js +156 -77
  87. package/dist/syncOut.js.map +1 -1
  88. package/dist/templates/blank/.env.example +1 -0
  89. package/dist/templates/parallel-planner/.env.example +1 -0
  90. package/dist/templates/parallel-planner/main.mts +3 -3
  91. package/dist/templates/parallel-planner-with-review/.env.example +5 -0
  92. package/dist/templates/parallel-planner-with-review/CODING_STANDARDS.md +27 -0
  93. package/dist/templates/parallel-planner-with-review/implement-prompt.md +62 -0
  94. package/dist/templates/parallel-planner-with-review/main.mts +249 -0
  95. package/dist/templates/parallel-planner-with-review/merge-prompt.md +22 -0
  96. package/dist/templates/parallel-planner-with-review/plan-prompt.md +33 -0
  97. package/dist/templates/parallel-planner-with-review/review-prompt.md +55 -0
  98. package/dist/templates/parallel-planner-with-review/template.json +4 -0
  99. package/dist/templates/sequential-reviewer/.env.example +1 -0
  100. package/dist/templates/sequential-reviewer/CODING_STANDARDS.md +27 -0
  101. package/dist/templates/sequential-reviewer/implement-prompt.md +34 -45
  102. package/dist/templates/sequential-reviewer/main.mts +2 -2
  103. package/dist/templates/sequential-reviewer/review-prompt.md +1 -1
  104. package/dist/templates/simple-loop/.env.example +1 -0
  105. package/dist/testSandbox.d.ts.map +1 -1
  106. package/dist/testSandbox.js +58 -53
  107. package/dist/testSandbox.js.map +1 -1
  108. package/package.json +25 -1
@@ -0,0 +1,62 @@
1
+ # TASK
2
+
3
+ Fix issue #{{ISSUE_NUMBER}}: {{ISSUE_TITLE}}
4
+
5
+ Pull in the issue using `gh issue view`. If it has a parent PRD, pull that in too.
6
+
7
+ Only work on the issue specified.
8
+
9
+ Work on branch {{BRANCH}}. Make commits, run tests, and close the issue when done.
10
+
11
+ # CONTEXT
12
+
13
+ Here are the last 10 commits:
14
+
15
+ <recent-commits>
16
+
17
+ !`git log -n 10 --format="%H%n%ad%n%B---" --date=short`
18
+
19
+ </recent-commits>
20
+
21
+ # EXPLORATION
22
+
23
+ Explore the repo and fill your context window with relevant information that will allow you to complete the task.
24
+
25
+ Pay extra attention to test files that touch the relevant parts of the code.
26
+
27
+ # EXECUTION
28
+
29
+ If applicable, use RGR to complete the task.
30
+
31
+ 1. RED: write one test
32
+ 2. GREEN: write the implementation to pass that test
33
+ 3. REPEAT until done
34
+ 4. REFACTOR the code
35
+
36
+ # FEEDBACK LOOPS
37
+
38
+ Before committing, run `npm run typecheck` and `npm run test` to ensure the tests pass.
39
+
40
+ # COMMIT
41
+
42
+ Make a git commit. The commit message must:
43
+
44
+ 1. Start with `RALPH:` prefix
45
+ 2. Include task completed + PRD reference
46
+ 3. Key decisions made
47
+ 4. Files changed
48
+ 5. Blockers or notes for next iteration
49
+
50
+ Keep it concise.
51
+
52
+ # THE ISSUE
53
+
54
+ If the task is not complete, leave a comment on the GitHub issue with what was done.
55
+
56
+ Do not close the issue - this will be done later.
57
+
58
+ Once complete, output <promise>COMPLETE</promise>.
59
+
60
+ # FINAL RULES
61
+
62
+ ONLY WORK ON A SINGLE TASK.
@@ -0,0 +1,249 @@
1
+ // Parallel Planner with Review — four-phase orchestration loop
2
+ //
3
+ // This template drives a multi-phase workflow:
4
+ // Phase 1 (Plan): An opus agent analyzes open issues, builds a
5
+ // dependency graph, and outputs a <plan> JSON
6
+ // listing unblocked issues with branch names.
7
+ // Phase 2 (Execute + Review): For each issue, a sandbox is created via
8
+ // createSandbox(). The implementer runs first
9
+ // (100 iterations). If it produces commits, a
10
+ // reviewer runs in the same sandbox on the same
11
+ // branch (1 iteration). All issue pipelines run
12
+ // concurrently via Promise.allSettled().
13
+ // Phase 3 (Merge): A single agent merges all completed branches
14
+ // into the current branch.
15
+ //
16
+ // The outer loop repeats up to MAX_ITERATIONS times so that newly unblocked
17
+ // issues are picked up after each round of merges.
18
+ //
19
+ // Usage:
20
+ // npx tsx .sandcastle/main.mts
21
+ // Or add to package.json:
22
+ // "scripts": { "sandcastle": "npx tsx .sandcastle/main.mts" }
23
+
24
+ import * as sandcastle from "@ai-hero/sandcastle";
25
+ import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Configuration
29
+ // ---------------------------------------------------------------------------
30
+
31
+ // Maximum number of plan→execute→merge cycles before stopping.
32
+ // Raise this if your backlog is large; lower it for a quick smoke-test run.
33
+ const MAX_ITERATIONS = 10;
34
+
35
+ // Hooks run inside the sandbox before the agent starts each iteration.
36
+ // npm install ensures the sandbox always has fresh dependencies.
37
+ const hooks = {
38
+ onSandboxReady: [{ command: "npm install" }],
39
+ };
40
+
41
+ // Copy node_modules from the host into the worktree before each sandbox
42
+ // starts. Avoids a full npm install from scratch; the hook above handles
43
+ // platform-specific binaries and any packages added since the last copy.
44
+ const copyToSandbox = ["node_modules"];
45
+
46
+ // Cap the number of concurrent sandboxes to avoid resource exhaustion.
47
+ const MAX_CONCURRENCY = 4;
48
+
49
+ // ---------------------------------------------------------------------------
50
+ // Main loop
51
+ // ---------------------------------------------------------------------------
52
+
53
+ for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
54
+ console.log(`\n=== Iteration ${iteration}/${MAX_ITERATIONS} ===\n`);
55
+
56
+ // -------------------------------------------------------------------------
57
+ // Phase 1: Plan
58
+ //
59
+ // The planning agent (opus, for deeper reasoning) reads the open issue list,
60
+ // builds a dependency graph, and selects the issues that can be worked in
61
+ // parallel right now (i.e., no blocking dependencies on other open issues).
62
+ //
63
+ // It outputs a <plan> JSON block — we parse that to drive Phase 2.
64
+ // -------------------------------------------------------------------------
65
+ const plan = await sandcastle.run({
66
+ hooks,
67
+ copyToSandbox,
68
+ sandbox: docker(),
69
+ name: "planner",
70
+ // One iteration is enough: the planner just needs to read and reason,
71
+ // not write code.
72
+ maxIterations: 1,
73
+ // Opus for planning: dependency analysis benefits from deeper reasoning.
74
+ agent: sandcastle.claudeCode("claude-opus-4-6"),
75
+ promptFile: "./.sandcastle/plan-prompt.md",
76
+ });
77
+
78
+ // Extract the <plan>…</plan> block from the agent's stdout.
79
+ const planMatch = plan.stdout.match(/<plan>([\s\S]*?)<\/plan>/);
80
+ if (!planMatch) {
81
+ throw new Error(
82
+ "Planning agent did not produce a <plan> tag.\n\n" + plan.stdout,
83
+ );
84
+ }
85
+
86
+ // The plan JSON contains an array of issues, each with number, title, branch.
87
+ const { issues } = JSON.parse(planMatch[1]!) as {
88
+ issues: { number: number; title: string; branch: string }[];
89
+ };
90
+
91
+ if (issues.length === 0) {
92
+ // No unblocked work — either everything is done or everything is blocked.
93
+ console.log("No unblocked issues to work on. Exiting.");
94
+ break;
95
+ }
96
+
97
+ console.log(
98
+ `Planning complete. ${issues.length} issue(s) to work in parallel:`,
99
+ );
100
+ for (const issue of issues) {
101
+ console.log(` #${issue.number}: ${issue.title} → ${issue.branch}`);
102
+ }
103
+
104
+ // -------------------------------------------------------------------------
105
+ // Phase 2: Execute + Review
106
+ //
107
+ // For each issue, create a sandbox via createSandbox() so the implementer
108
+ // and reviewer share the same sandbox instance per branch. The implementer
109
+ // runs first; if it produces commits, the reviewer runs in the same sandbox.
110
+ //
111
+ // A semaphore limits concurrency to MAX_CONCURRENCY sandboxes at once.
112
+ // Promise.allSettled means one failing pipeline doesn't cancel the others.
113
+ // -------------------------------------------------------------------------
114
+
115
+ // Simple semaphore for concurrency limiting
116
+ let running = 0;
117
+ const waiting: Array<() => void> = [];
118
+ const acquire = (): Promise<void> => {
119
+ if (running < MAX_CONCURRENCY) {
120
+ running++;
121
+ return Promise.resolve();
122
+ }
123
+ return new Promise<void>((resolve) => {
124
+ waiting.push(() => {
125
+ running++;
126
+ resolve();
127
+ });
128
+ });
129
+ };
130
+ const release = () => {
131
+ running--;
132
+ const next = waiting.shift();
133
+ if (next) next();
134
+ };
135
+
136
+ const settled = await Promise.allSettled(
137
+ issues.map(async (issue) => {
138
+ await acquire();
139
+ try {
140
+ const sandbox = await sandcastle.createSandbox({
141
+ branch: issue.branch,
142
+ sandbox: docker(),
143
+ hooks,
144
+ copyToSandbox,
145
+ });
146
+
147
+ try {
148
+ // Run the implementer
149
+ const implement = await sandbox.run({
150
+ name: "implementer",
151
+ maxIterations: 100,
152
+ agent: sandcastle.claudeCode("claude-sonnet-4-6"),
153
+ promptFile: "./.sandcastle/implement-prompt.md",
154
+ promptArgs: {
155
+ ISSUE_NUMBER: String(issue.number),
156
+ ISSUE_TITLE: issue.title,
157
+ BRANCH: issue.branch,
158
+ },
159
+ });
160
+
161
+ // Only review if the implementer produced commits
162
+ if (implement.commits.length > 0) {
163
+ await sandbox.run({
164
+ name: "reviewer",
165
+ maxIterations: 1,
166
+ agent: sandcastle.claudeCode("claude-sonnet-4-6"),
167
+ promptFile: "./.sandcastle/review-prompt.md",
168
+ promptArgs: {
169
+ BRANCH: issue.branch,
170
+ },
171
+ });
172
+ }
173
+
174
+ return implement;
175
+ } finally {
176
+ await sandbox.close();
177
+ }
178
+ } finally {
179
+ release();
180
+ }
181
+ }),
182
+ );
183
+
184
+ // Log any agents that threw (network error, sandbox crash, etc.).
185
+ for (const [i, outcome] of settled.entries()) {
186
+ if (outcome.status === "rejected") {
187
+ console.error(
188
+ ` ✗ #${issues[i]!.number} (${issues[i]!.branch}) failed: ${outcome.reason}`,
189
+ );
190
+ }
191
+ }
192
+
193
+ // Only pass branches that actually produced commits to the merge phase.
194
+ // An agent that ran successfully but made no commits has nothing to merge.
195
+ const completedIssues = settled
196
+ .map((outcome, i) => ({ outcome, issue: issues[i]! }))
197
+ .filter(
198
+ (entry) =>
199
+ entry.outcome.status === "fulfilled" &&
200
+ entry.outcome.value.commits.length > 0,
201
+ )
202
+ .map((entry) => entry.issue);
203
+
204
+ const completedBranches = completedIssues.map((i) => i.branch);
205
+
206
+ console.log(
207
+ `\nExecution complete. ${completedBranches.length} branch(es) with commits:`,
208
+ );
209
+ for (const branch of completedBranches) {
210
+ console.log(` ${branch}`);
211
+ }
212
+
213
+ if (completedBranches.length === 0) {
214
+ // All agents ran but none made commits — nothing to merge this cycle.
215
+ console.log("No commits produced. Nothing to merge.");
216
+ continue;
217
+ }
218
+
219
+ // -------------------------------------------------------------------------
220
+ // Phase 3: Merge
221
+ //
222
+ // One agent merges all completed branches into the current branch,
223
+ // resolving any conflicts and running tests to confirm everything works.
224
+ //
225
+ // The {{BRANCHES}} and {{ISSUES}} prompt arguments are lists that the agent
226
+ // uses to know which branches to merge and which issues to close.
227
+ // -------------------------------------------------------------------------
228
+ await sandcastle.run({
229
+ hooks,
230
+ copyToSandbox,
231
+ sandbox: docker(),
232
+ name: "merger",
233
+ maxIterations: 1,
234
+ agent: sandcastle.claudeCode("claude-sonnet-4-6"),
235
+ promptFile: "./.sandcastle/merge-prompt.md",
236
+ promptArgs: {
237
+ // A markdown list of branch names, one per line.
238
+ BRANCHES: completedBranches.map((b) => `- ${b}`).join("\n"),
239
+ // A markdown list of issue numbers and titles, one per line.
240
+ ISSUES: completedIssues
241
+ .map((i) => `- #${i.number}: ${i.title}`)
242
+ .join("\n"),
243
+ },
244
+ });
245
+
246
+ console.log("\nBranches merged.");
247
+ }
248
+
249
+ console.log("\nAll done.");
@@ -0,0 +1,22 @@
1
+ # TASK
2
+
3
+ Merge the following branches into the current branch:
4
+
5
+ {{BRANCHES}}
6
+
7
+ For each branch:
8
+
9
+ 1. Run `git merge <branch> --no-edit`
10
+ 2. If there are merge conflicts, resolve them intelligently by reading both sides and choosing the correct resolution
11
+ 3. After resolving conflicts, run `npm run typecheck` and `npm run test` to verify everything works
12
+ 4. If tests fail, fix the issues before proceeding to the next branch
13
+
14
+ After all branches are merged, make a single commit summarizing the merge.
15
+
16
+ # CLOSE ISSUES
17
+
18
+ For each branch that was merged, close its issue. Here are all the issues:
19
+
20
+ {{ISSUES}}
21
+
22
+ Once you've merged everything you can, output <promise>COMPLETE</promise>.
@@ -0,0 +1,33 @@
1
+ # ISSUES
2
+
3
+ Here are the open issues in the repo:
4
+
5
+ <issues-json>
6
+
7
+ !`gh issue list --state open --label Sandcastle --json number,title,body,labels,comments --jq '[.[] | {number, title, body, labels: [.labels[].name], comments: [.comments[].body]}]'`
8
+
9
+ </issues-json>
10
+
11
+ # TASK
12
+
13
+ Analyze the open issues and build a dependency graph. For each issue, determine whether it **blocks** or **is blocked by** any other open issue.
14
+
15
+ An issue B is **blocked by** issue A if:
16
+
17
+ - B requires code or infrastructure that A introduces
18
+ - B and A modify overlapping files or modules, making concurrent work likely to produce merge conflicts
19
+ - B's requirements depend on a decision or API shape that A will establish
20
+
21
+ An issue is **unblocked** if it has zero blocking dependencies on other open issues.
22
+
23
+ For each unblocked issue, assign a branch name using the format `sandcastle/issue-{number}-{slug}`.
24
+
25
+ # OUTPUT
26
+
27
+ Output your plan as a JSON object wrapped in `<plan>` tags:
28
+
29
+ <plan>
30
+ {"issues": [{"number": 42, "title": "Fix auth bug", "branch": "sandcastle/issue-42-fix-auth-bug"}]}
31
+ </plan>
32
+
33
+ Include only unblocked issues. If every issue is blocked, include the single highest-priority candidate (the one with the fewest or weakest dependencies).
@@ -0,0 +1,55 @@
1
+ # TASK
2
+
3
+ Review the code changes on branch `{{BRANCH}}` and improve code clarity, consistency, and maintainability while preserving exact functionality.
4
+
5
+ # CONTEXT
6
+
7
+ ## Branch diff
8
+
9
+ !`git diff main...{{BRANCH}}`
10
+
11
+ ## Commits on this branch
12
+
13
+ !`git log main..{{BRANCH}} --oneline`
14
+
15
+ # REVIEW PROCESS
16
+
17
+ 1. **Understand the change**: Read the diff and commits above to understand the intent.
18
+
19
+ 2. **Analyze for improvements**: Look for opportunities to:
20
+ - Reduce unnecessary complexity and nesting
21
+ - Eliminate redundant code and abstractions
22
+ - Improve readability through clear variable and function names
23
+ - Consolidate related logic
24
+ - Remove unnecessary comments that describe obvious code
25
+ - Avoid nested ternary operators - prefer switch statements or if/else chains
26
+ - Choose clarity over brevity - explicit code is often better than overly compact code
27
+
28
+ 3. **Check correctness**:
29
+ - Does the implementation match the intent? Are edge cases handled?
30
+ - Are new/changed behaviours covered by tests?
31
+ - Are there unsafe casts, `any` types, or unchecked assumptions?
32
+ - Does the change introduce injection vulnerabilities, credential leaks, or other security issues?
33
+
34
+ 4. **Maintain balance**: Avoid over-simplification that could:
35
+ - Reduce code clarity or maintainability
36
+ - Create overly clever solutions that are hard to understand
37
+ - Combine too many concerns into single functions or components
38
+ - Remove helpful abstractions that improve code organization
39
+ - Make the code harder to debug or extend
40
+
41
+ 5. **Apply project standards**: Follow the coding standards defined in @.sandcastle/CODING_STANDARDS.md
42
+
43
+ 6. **Preserve functionality**: Never change what the code does - only how it does it. All original features, outputs, and behaviors must remain intact.
44
+
45
+ # EXECUTION
46
+
47
+ If you find improvements to make:
48
+
49
+ 1. Make the changes directly on this branch
50
+ 2. Run tests and type checking to ensure nothing is broken
51
+ 3. Commit describing the refinements
52
+
53
+ If the code is already clean and well-structured, do nothing.
54
+
55
+ Once complete, output <promise>COMPLETE</promise>.
@@ -0,0 +1,4 @@
1
+ {
2
+ "name": "parallel-planner-with-review",
3
+ "description": "Plans parallelizable issues, executes with per-branch review, merges"
4
+ }
@@ -1,4 +1,5 @@
1
1
  # Anthropic API key
2
+ # If you want to use your Claude subscription instead of an API key, see https://github.com/mattpocock/sandcastle/issues/191
2
3
  ANTHROPIC_API_KEY=
3
4
  # GitHub personal access token
4
5
  GH_TOKEN=
@@ -0,0 +1,27 @@
1
+ # Coding Standards
2
+
3
+ <!-- Customize this file with your project's coding standards.
4
+ The reviewer agent loads it during code review via @.sandcastle/CODING_STANDARDS.md
5
+ so these standards are enforced during review without costing tokens during implementation. -->
6
+
7
+ ## Style
8
+
9
+ <!-- Example:
10
+ - Use camelCase for variables and functions
11
+ - Use PascalCase for classes and types
12
+ - Prefer named exports over default exports
13
+ -->
14
+
15
+ ## Testing
16
+
17
+ <!-- Example:
18
+ - Every public function must have at least one test
19
+ - Use descriptive test names that explain the expected behavior
20
+ -->
21
+
22
+ ## Architecture
23
+
24
+ <!-- Example:
25
+ - Keep modules focused on a single responsibility
26
+ - Prefer composition over inheritance
27
+ -->
@@ -1,62 +1,51 @@
1
- # TASK
1
+ # Context
2
2
 
3
- Fix issue #{{ISSUE_NUMBER}}: {{ISSUE_TITLE}}
3
+ ## Open issues
4
4
 
5
- Pull in the issue using `gh issue view`. If it has a parent PRD, pull that in too.
5
+ !`gh issue list --label Sandcastle --json number,title,body --limit 20`
6
6
 
7
- Only work on the issue specified.
7
+ ## Recent RALPH commits (last 10)
8
8
 
9
- Work on branch {{BRANCH}}. Make commits, run tests, and close the issue when done.
9
+ !`git log --oneline --grep="RALPH" -10`
10
10
 
11
- # CONTEXT
11
+ # Task
12
12
 
13
- Here are the last 10 commits:
13
+ You are RALPH an autonomous coding agent working through GitHub issues one at a time.
14
14
 
15
- <recent-commits>
15
+ ## Priority order
16
16
 
17
- !`git log -n 10 --format="%H%n%ad%n%B---" --date=short`
17
+ Work on issues in this order:
18
18
 
19
- </recent-commits>
19
+ 1. **Bug fixes** — broken behaviour affecting users
20
+ 2. **Tracer bullets** — thin end-to-end slices that prove an approach works
21
+ 3. **Polish** — improving existing functionality (error messages, UX, docs)
22
+ 4. **Refactors** — internal cleanups with no user-visible change
20
23
 
21
- # EXPLORATION
24
+ Pick the highest-priority open issue that is not blocked by another open issue.
22
25
 
23
- Explore the repo and fill your context window with relevant information that will allow you to complete the task.
26
+ ## Workflow
24
27
 
25
- Pay extra attention to test files that touch the relevant parts of the code.
28
+ 1. **Explore** read the issue carefully. Pull in the parent PRD if referenced. Read the relevant source files and tests before writing any code.
29
+ 2. **Plan** — decide what to change and why. Keep the change as small as possible.
30
+ 3. **Execute** — use RGR (Red → Green → Repeat → Refactor): write a failing test first, then write the implementation to pass it.
31
+ 4. **Verify** — run `npm run typecheck` and `npm run test` before committing. Fix any failures before proceeding.
32
+ 5. **Commit** — make a single git commit. The message MUST:
33
+ - Start with `RALPH:` prefix
34
+ - Include the task completed and any PRD reference
35
+ - List key decisions made
36
+ - List files changed
37
+ - Note any blockers for the next iteration
38
+ 6. **Close** — close the issue with `gh issue close <number> --comment "..."` explaining what was done.
26
39
 
27
- # EXECUTION
40
+ ## Rules
28
41
 
29
- If applicable, use RGR to complete the task.
42
+ - Work on **one issue per iteration**. Do not attempt multiple issues in a single iteration.
43
+ - Do not close an issue until you have committed the fix and verified tests pass.
44
+ - Do not leave commented-out code or TODO comments in committed code.
45
+ - If you are blocked (missing context, failing tests you cannot fix, external dependency), leave a comment on the issue and move on — do not close it.
30
46
 
31
- 1. RED: write one test
32
- 2. GREEN: write the implementation to pass that test
33
- 3. REPEAT until done
34
- 4. REFACTOR the code
47
+ # Done
35
48
 
36
- # FEEDBACK LOOPS
49
+ When all actionable issues are complete (or you are blocked on all remaining ones), output the completion signal:
37
50
 
38
- Before committing, run `npm run typecheck` and `npm run test` to ensure the tests pass.
39
-
40
- # COMMIT
41
-
42
- Make a git commit. The commit message must:
43
-
44
- 1. Start with `RALPH:` prefix
45
- 2. Include task completed + PRD reference
46
- 3. Key decisions made
47
- 4. Files changed
48
- 5. Blockers or notes for next iteration
49
-
50
- Keep it concise.
51
-
52
- # THE ISSUE
53
-
54
- If the task is not complete, leave a comment on the GitHub issue with what was done.
55
-
56
- Do not close the issue - this will be done later.
57
-
58
- Once complete, output <promise>COMPLETE</promise>.
59
-
60
- # FINAL RULES
61
-
62
- ONLY WORK ON A SINGLE TASK.
51
+ <promise>COMPLETE</promise>
@@ -87,8 +87,9 @@ for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
87
87
  hooks,
88
88
  copyToSandbox,
89
89
  sandbox: docker(),
90
+ branchStrategy: { type: "branch", branch },
90
91
  name: "reviewer",
91
- maxIterations: 10,
92
+ maxIterations: 1,
92
93
  agent: sandcastle.claudeCode("claude-sonnet-4-6"),
93
94
  promptFile: "./.sandcastle/review-prompt.md",
94
95
  // Prompt arguments substitute {{BRANCH}} in review-prompt.md before the
@@ -96,7 +97,6 @@ for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
96
97
  promptArgs: {
97
98
  BRANCH: branch,
98
99
  },
99
- worktree: { mode: "branch", branch },
100
100
  });
101
101
 
102
102
  console.log("\nReview complete.");
@@ -38,7 +38,7 @@ Review the code changes on branch `{{BRANCH}}` and improve code clarity, consist
38
38
  - Remove helpful abstractions that improve code organization
39
39
  - Make the code harder to debug or extend
40
40
 
41
- 5. **Apply project standards**: Follow the established coding standards in the project.
41
+ 5. **Apply project standards**: Follow the coding standards defined in @.sandcastle/CODING_STANDARDS.md
42
42
 
43
43
  6. **Preserve functionality**: Never change what the code does - only how it does it. All original features, outputs, and behaviors must remain intact.
44
44
 
@@ -1,4 +1,5 @@
1
1
  # Anthropic API key
2
+ # If you want to use your Claude subscription instead of an API key, see https://github.com/mattpocock/sandcastle/issues/191
2
3
  ANTHROPIC_API_KEY=
3
4
  # GitHub personal access token
4
5
  GH_TOKEN=
@@ -1 +1 @@
1
- {"version":3,"file":"testSandbox.d.ts","sourceRoot":"","sources":["../src/testSandbox.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAU,KAAK,EAAE,MAAM,QAAQ,CAAC;AAQvC,OAAO,EAAmB,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAa/D,eAAO,MAAM,qBAAqB,4DA8GjC,CAAC"}
1
+ {"version":3,"file":"testSandbox.d.ts","sourceRoot":"","sources":["../src/testSandbox.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAU,KAAK,EAAE,MAAM,QAAQ,CAAC;AAQvC,OAAO,EAAmB,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAa/D,eAAO,MAAM,qBAAqB,4DAiHjC,CAAC"}