@rockclaver/sandcastle 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1355 -0
- package/dist/MountConfig-CmXclHA5.d.ts +26 -0
- package/dist/SandboxProvider-EkSMuBp8.d.ts +243 -0
- package/dist/chunk-72UVAC7B.js +99 -0
- package/dist/chunk-72UVAC7B.js.map +1 -0
- package/dist/chunk-BIWNFKGV.js +22 -0
- package/dist/chunk-BIWNFKGV.js.map +1 -0
- package/dist/chunk-FKX3DRTL.js +362 -0
- package/dist/chunk-FKX3DRTL.js.map +1 -0
- package/dist/chunk-NGBM7T3E.js +76 -0
- package/dist/chunk-NGBM7T3E.js.map +1 -0
- package/dist/chunk-QCLZLPJ7.js +26431 -0
- package/dist/chunk-QCLZLPJ7.js.map +1 -0
- package/dist/chunk-VAKEM3U2.js +26997 -0
- package/dist/chunk-VAKEM3U2.js.map +1 -0
- package/dist/index.d.ts +943 -0
- package/dist/index.js +2393 -0
- package/dist/index.js.map +1 -0
- package/dist/main.d.ts +1 -0
- package/dist/main.js +19268 -0
- package/dist/main.js.map +1 -0
- package/dist/mountUtils-CCA-bbpK.d.ts +25 -0
- package/dist/sandboxes/daytona.d.ts +60 -0
- package/dist/sandboxes/daytona.js +122 -0
- package/dist/sandboxes/daytona.js.map +1 -0
- package/dist/sandboxes/docker.d.ts +110 -0
- package/dist/sandboxes/docker.js +9 -0
- package/dist/sandboxes/docker.js.map +1 -0
- package/dist/sandboxes/no-sandbox.d.ts +38 -0
- package/dist/sandboxes/no-sandbox.js +7 -0
- package/dist/sandboxes/no-sandbox.js.map +1 -0
- package/dist/sandboxes/podman.d.ts +124 -0
- package/dist/sandboxes/podman.js +299 -0
- package/dist/sandboxes/podman.js.map +1 -0
- package/dist/sandboxes/vercel.d.ts +104 -0
- package/dist/sandboxes/vercel.js +148 -0
- package/dist/sandboxes/vercel.js.map +1 -0
- package/dist/templates/blank/main.mts +14 -0
- package/dist/templates/blank/prompt.md +12 -0
- package/dist/templates/blank/template.json +4 -0
- package/dist/templates/parallel-planner/implement-prompt.md +62 -0
- package/dist/templates/parallel-planner/main.mts +204 -0
- package/dist/templates/parallel-planner/merge-prompt.md +26 -0
- package/dist/templates/parallel-planner/plan-prompt.md +37 -0
- package/dist/templates/parallel-planner/template.json +4 -0
- package/dist/templates/parallel-planner-with-review/CODING_STANDARDS.md +27 -0
- package/dist/templates/parallel-planner-with-review/implement-prompt.md +62 -0
- package/dist/templates/parallel-planner-with-review/main.mts +226 -0
- package/dist/templates/parallel-planner-with-review/merge-prompt.md +26 -0
- package/dist/templates/parallel-planner-with-review/plan-prompt.md +37 -0
- package/dist/templates/parallel-planner-with-review/review-prompt.md +55 -0
- package/dist/templates/parallel-planner-with-review/template.json +4 -0
- package/dist/templates/sequential-reviewer/CODING_STANDARDS.md +27 -0
- package/dist/templates/sequential-reviewer/implement-prompt.md +53 -0
- package/dist/templates/sequential-reviewer/main.mts +119 -0
- package/dist/templates/sequential-reviewer/review-prompt.md +55 -0
- package/dist/templates/sequential-reviewer/template.json +4 -0
- package/dist/templates/simple-loop/main.mts +49 -0
- package/dist/templates/simple-loop/prompt.md +53 -0
- package/dist/templates/simple-loop/template.json +4 -0
- package/package.json +104 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Context
|
|
2
|
+
|
|
3
|
+
<!-- Use !`command` to pull in dynamic context. Commands run inside the sandbox. -->
|
|
4
|
+
<!-- Example: !`git log --oneline -10` or !`{{LIST_TASKS_COMMAND}}` -->
|
|
5
|
+
|
|
6
|
+
# Task
|
|
7
|
+
|
|
8
|
+
<!-- Describe what the agent should do. -->
|
|
9
|
+
|
|
10
|
+
# Done
|
|
11
|
+
|
|
12
|
+
<!-- When the task is complete, output <promise>COMPLETE</promise> to signal early termination. -->
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# TASK
|
|
2
|
+
|
|
3
|
+
Fix issue {{TASK_ID}}: {{ISSUE_TITLE}}
|
|
4
|
+
|
|
5
|
+
Pull in the issue using `{{VIEW_TASK_COMMAND}}`. If it has a parent PRD, pull that in too.
|
|
6
|
+
|
|
7
|
+
Only work on the issue specified.
|
|
8
|
+
|
|
9
|
+
Work on branch {{BRANCH}}. Make commits and run tests.
|
|
10
|
+
|
|
11
|
+
# CONTEXT
|
|
12
|
+
|
|
13
|
+
Here are the last 10 commits:
|
|
14
|
+
|
|
15
|
+
<recent-commits>
|
|
16
|
+
|
|
17
|
+
!`git log -n 10 --format="%H%n%ad%n%B---" --date=short`
|
|
18
|
+
|
|
19
|
+
</recent-commits>
|
|
20
|
+
|
|
21
|
+
# EXPLORATION
|
|
22
|
+
|
|
23
|
+
Explore the repo and fill your context window with relevant information that will allow you to complete the task.
|
|
24
|
+
|
|
25
|
+
Pay extra attention to test files that touch the relevant parts of the code.
|
|
26
|
+
|
|
27
|
+
# EXECUTION
|
|
28
|
+
|
|
29
|
+
If applicable, use RGR to complete the task.
|
|
30
|
+
|
|
31
|
+
1. RED: write one test
|
|
32
|
+
2. GREEN: write the implementation to pass that test
|
|
33
|
+
3. REPEAT until done
|
|
34
|
+
4. REFACTOR the code
|
|
35
|
+
|
|
36
|
+
# FEEDBACK LOOPS
|
|
37
|
+
|
|
38
|
+
Before committing, run `npm run typecheck` and `npm run test` to ensure the tests pass.
|
|
39
|
+
|
|
40
|
+
# COMMIT
|
|
41
|
+
|
|
42
|
+
Make a git commit. The commit message must:
|
|
43
|
+
|
|
44
|
+
1. Start with `RALPH:` prefix
|
|
45
|
+
2. Include task completed + PRD reference
|
|
46
|
+
3. Key decisions made
|
|
47
|
+
4. Files changed
|
|
48
|
+
5. Blockers or notes for next iteration
|
|
49
|
+
|
|
50
|
+
Keep it concise.
|
|
51
|
+
|
|
52
|
+
# THE ISSUE
|
|
53
|
+
|
|
54
|
+
If the task is not complete, leave a comment on the issue with what was done.
|
|
55
|
+
|
|
56
|
+
Do not close the issue - this will be done later.
|
|
57
|
+
|
|
58
|
+
Once complete, output <promise>COMPLETE</promise>.
|
|
59
|
+
|
|
60
|
+
# FINAL RULES
|
|
61
|
+
|
|
62
|
+
ONLY WORK ON A SINGLE TASK.
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
// Parallel Planner — three-phase orchestration loop
|
|
2
|
+
//
|
|
3
|
+
// This template drives a multi-phase workflow:
|
|
4
|
+
// Phase 1 (Plan): An opus agent analyzes open issues, builds a dependency
|
|
5
|
+
// graph, and outputs a <plan> JSON listing unblocked issues
|
|
6
|
+
// with their target branch names.
|
|
7
|
+
// Phase 2 (Execute): N sonnet agents run in parallel via Promise.allSettled,
|
|
8
|
+
// each working a single issue on its own branch.
|
|
9
|
+
// Phase 3 (Merge): A sonnet agent merges all branches that produced commits.
|
|
10
|
+
//
|
|
11
|
+
// The outer loop repeats up to MAX_ITERATIONS times so that newly unblocked
|
|
12
|
+
// issues are picked up after each round of merges.
|
|
13
|
+
//
|
|
14
|
+
// Usage:
|
|
15
|
+
// npx tsx .sandcastle/main.mts
|
|
16
|
+
// Or add to package.json:
|
|
17
|
+
// "scripts": { "sandcastle": "npx tsx .sandcastle/main.mts" }
|
|
18
|
+
|
|
19
|
+
import * as sandcastle from "@ai-hero/sandcastle";
|
|
20
|
+
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
|
|
21
|
+
import { z } from "zod";
|
|
22
|
+
|
|
23
|
+
// The planner emits its plan as JSON inside <plan> tags; Output.object extracts
|
|
24
|
+
// and validates it against this schema. We use Zod here, but any Standard
|
|
25
|
+
// Schema validator works just as well — Valibot, ArkType, etc. See
|
|
26
|
+
// https://standardschema.dev.
|
|
27
|
+
const planSchema = z.object({
|
|
28
|
+
issues: z.array(
|
|
29
|
+
z.object({ id: z.string(), title: z.string(), branch: z.string() }),
|
|
30
|
+
),
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// Configuration
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
// Maximum number of plan→execute→merge cycles before stopping.
|
|
38
|
+
// Raise this if your backlog is large; lower it for a quick smoke-test run.
|
|
39
|
+
const MAX_ITERATIONS = 10;
|
|
40
|
+
|
|
41
|
+
// Hooks run inside the sandbox before the agent starts each iteration.
|
|
42
|
+
// npm install ensures the sandbox always has fresh dependencies.
|
|
43
|
+
const hooks = {
|
|
44
|
+
sandbox: { onSandboxReady: [{ command: "npm install" }] },
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// Copy node_modules from the host into the worktree before each sandbox
|
|
48
|
+
// starts. Avoids a full npm install from scratch; the hook above handles
|
|
49
|
+
// platform-specific binaries and any packages added since the last copy.
|
|
50
|
+
const copyToWorktree = ["node_modules"];
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Main loop
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
|
|
57
|
+
console.log(`\n=== Iteration ${iteration}/${MAX_ITERATIONS} ===\n`);
|
|
58
|
+
|
|
59
|
+
// -------------------------------------------------------------------------
|
|
60
|
+
// Phase 1: Plan
|
|
61
|
+
//
|
|
62
|
+
// The planning agent (opus, for deeper reasoning) reads the open issue list,
|
|
63
|
+
// builds a dependency graph, and selects the issues that can be worked in
|
|
64
|
+
// parallel right now (i.e., no blocking dependencies on other open issues).
|
|
65
|
+
//
|
|
66
|
+
// It outputs a <plan> JSON block — Output.object parses and validates it.
|
|
67
|
+
// -------------------------------------------------------------------------
|
|
68
|
+
const plan = await sandcastle.run({
|
|
69
|
+
hooks,
|
|
70
|
+
sandbox: docker(),
|
|
71
|
+
name: "planner",
|
|
72
|
+
// One iteration is enough: the planner just needs to read and reason,
|
|
73
|
+
// not write code. (Structured output requires maxIterations: 1.)
|
|
74
|
+
maxIterations: 1,
|
|
75
|
+
// Opus for planning: dependency analysis benefits from deeper reasoning.
|
|
76
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
77
|
+
promptFile: "./.sandcastle/plan-prompt.md",
|
|
78
|
+
// Extract and validate the <plan> JSON into a typed object. Throws
|
|
79
|
+
// StructuredOutputError if the tag is missing, the JSON is malformed, or
|
|
80
|
+
// validation fails — which aborts the loop.
|
|
81
|
+
output: sandcastle.Output.object({ tag: "plan", schema: planSchema }),
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
const issues = plan.output.issues;
|
|
85
|
+
|
|
86
|
+
if (issues.length === 0) {
|
|
87
|
+
// No unblocked work — either everything is done or everything is blocked.
|
|
88
|
+
console.log("No unblocked issues to work on. Exiting.");
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
console.log(
|
|
93
|
+
`Planning complete. ${issues.length} issue(s) to work in parallel:`,
|
|
94
|
+
);
|
|
95
|
+
for (const issue of issues) {
|
|
96
|
+
console.log(` ${issue.id}: ${issue.title} → ${issue.branch}`);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// -------------------------------------------------------------------------
|
|
100
|
+
// Phase 2: Execute
|
|
101
|
+
//
|
|
102
|
+
// Spawn one sonnet agent per issue, all running concurrently.
|
|
103
|
+
// Each agent works on its own branch so there are no conflicts during
|
|
104
|
+
// execution — merging happens in Phase 3.
|
|
105
|
+
//
|
|
106
|
+
// Promise.allSettled means one failing agent doesn't cancel the others.
|
|
107
|
+
// -------------------------------------------------------------------------
|
|
108
|
+
const settled = await Promise.allSettled(
|
|
109
|
+
issues.map((issue) =>
|
|
110
|
+
sandcastle.run({
|
|
111
|
+
hooks,
|
|
112
|
+
copyToWorktree,
|
|
113
|
+
// Each agent starts on its own branch via branchStrategy on run().
|
|
114
|
+
sandbox: docker(),
|
|
115
|
+
branchStrategy: { type: "branch", branch: issue.branch },
|
|
116
|
+
name: "implementer",
|
|
117
|
+
// Give each agent plenty of room to implement and iterate on tests.
|
|
118
|
+
maxIterations: 100,
|
|
119
|
+
// Sonnet for execution: fast and capable enough for typical issue work.
|
|
120
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
121
|
+
promptFile: "./.sandcastle/implement-prompt.md",
|
|
122
|
+
// Prompt arguments substitute {{TASK_ID}}, {{ISSUE_TITLE}},
|
|
123
|
+
// and {{BRANCH}} placeholders in implement-prompt.md before the
|
|
124
|
+
// agent sees the prompt.
|
|
125
|
+
promptArgs: {
|
|
126
|
+
TASK_ID: issue.id,
|
|
127
|
+
ISSUE_TITLE: issue.title,
|
|
128
|
+
BRANCH: issue.branch,
|
|
129
|
+
},
|
|
130
|
+
}),
|
|
131
|
+
),
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
// Log any agents that threw (network error, sandbox crash, etc.).
|
|
135
|
+
for (const [i, outcome] of settled.entries()) {
|
|
136
|
+
if (outcome.status === "rejected") {
|
|
137
|
+
console.error(
|
|
138
|
+
` ✗ ${issues[i]!.id} (${issues[i]!.branch}) failed: ${outcome.reason}`,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Only pass branches that actually produced commits to the merge phase.
|
|
144
|
+
// An agent that ran successfully but made no commits has nothing to merge.
|
|
145
|
+
const completedIssues = settled
|
|
146
|
+
.map((outcome, i) => ({ outcome, issue: issues[i]! }))
|
|
147
|
+
.filter(
|
|
148
|
+
(
|
|
149
|
+
entry,
|
|
150
|
+
): entry is {
|
|
151
|
+
outcome: PromiseFulfilledResult<
|
|
152
|
+
Awaited<ReturnType<typeof sandcastle.run>>
|
|
153
|
+
>;
|
|
154
|
+
issue: (typeof issues)[number];
|
|
155
|
+
} =>
|
|
156
|
+
entry.outcome.status === "fulfilled" &&
|
|
157
|
+
entry.outcome.value.commits.length > 0,
|
|
158
|
+
)
|
|
159
|
+
.map((entry) => entry.issue);
|
|
160
|
+
|
|
161
|
+
const completedBranches = completedIssues.map((i) => i.branch);
|
|
162
|
+
|
|
163
|
+
console.log(
|
|
164
|
+
`\nExecution complete. ${completedBranches.length} branch(es) with commits:`,
|
|
165
|
+
);
|
|
166
|
+
for (const branch of completedBranches) {
|
|
167
|
+
console.log(` ${branch}`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (completedBranches.length === 0) {
|
|
171
|
+
// All agents ran but none made commits — nothing to merge this cycle.
|
|
172
|
+
console.log("No commits produced. Nothing to merge.");
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// -------------------------------------------------------------------------
|
|
177
|
+
// Phase 3: Merge
|
|
178
|
+
//
|
|
179
|
+
// One sonnet agent merges all completed branches into the current branch,
|
|
180
|
+
// resolving any conflicts and running tests to confirm everything still works.
|
|
181
|
+
//
|
|
182
|
+
// The {{BRANCHES}} and {{ISSUES}} prompt arguments are lists that the agent
|
|
183
|
+
// uses to know which branches to merge and which issues to close.
|
|
184
|
+
// -------------------------------------------------------------------------
|
|
185
|
+
await sandcastle.run({
|
|
186
|
+
hooks,
|
|
187
|
+
sandbox: docker(),
|
|
188
|
+
name: "merger",
|
|
189
|
+
maxIterations: 1,
|
|
190
|
+
// Sonnet is sufficient for merge conflict resolution.
|
|
191
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
192
|
+
promptFile: "./.sandcastle/merge-prompt.md",
|
|
193
|
+
promptArgs: {
|
|
194
|
+
// A markdown list of branch names, one per line.
|
|
195
|
+
BRANCHES: completedBranches.map((b) => `- ${b}`).join("\n"),
|
|
196
|
+
// A markdown list of issue IDs and titles, one per line.
|
|
197
|
+
ISSUES: completedIssues.map((i) => `- ${i.id}: ${i.title}`).join("\n"),
|
|
198
|
+
},
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
console.log("\nBranches merged.");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
console.log("\nAll done.");
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# TASK
|
|
2
|
+
|
|
3
|
+
Merge the following branches into the current branch:
|
|
4
|
+
|
|
5
|
+
{{BRANCHES}}
|
|
6
|
+
|
|
7
|
+
For each branch:
|
|
8
|
+
|
|
9
|
+
1. Run `git merge <branch> --no-edit`
|
|
10
|
+
2. If there are merge conflicts, resolve them intelligently by reading both sides and choosing the correct resolution
|
|
11
|
+
3. After resolving conflicts, run `npm run typecheck` and `npm run test` to verify everything works
|
|
12
|
+
4. If tests fail, fix the issues before proceeding to the next branch
|
|
13
|
+
|
|
14
|
+
After all branches are merged, make a single commit summarizing the merge.
|
|
15
|
+
|
|
16
|
+
# CLOSE ISSUES
|
|
17
|
+
|
|
18
|
+
For each branch that was merged, close its issue using the following command:
|
|
19
|
+
|
|
20
|
+
`{{CLOSE_TASK_COMMAND}}`
|
|
21
|
+
|
|
22
|
+
Here are all the issues:
|
|
23
|
+
|
|
24
|
+
{{ISSUES}}
|
|
25
|
+
|
|
26
|
+
Once you've merged everything you can, output <promise>COMPLETE</promise>.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# ISSUES
|
|
2
|
+
|
|
3
|
+
Here are the open issues in the repo:
|
|
4
|
+
|
|
5
|
+
<issues-json>
|
|
6
|
+
|
|
7
|
+
!`{{LIST_TASKS_COMMAND}}`
|
|
8
|
+
|
|
9
|
+
</issues-json>
|
|
10
|
+
|
|
11
|
+
The list above has already been filtered to issues ready for work.
|
|
12
|
+
|
|
13
|
+
# TASK
|
|
14
|
+
|
|
15
|
+
Analyze the open issues and build a dependency graph. For each issue, determine whether it **blocks** or **is blocked by** any other open issue.
|
|
16
|
+
|
|
17
|
+
An issue B is **blocked by** issue A if:
|
|
18
|
+
|
|
19
|
+
- B requires code or infrastructure that A introduces
|
|
20
|
+
- B and A modify overlapping files or modules, making concurrent work likely to produce merge conflicts
|
|
21
|
+
- B's requirements depend on a decision or API shape that A will establish
|
|
22
|
+
|
|
23
|
+
An issue is **unblocked** if it has zero blocking dependencies on other open issues.
|
|
24
|
+
|
|
25
|
+
For each unblocked issue, assign a branch name using the exact format `sandcastle/issue-{id}` (no slug or other suffix). This must be deterministic so that re-planning the same issue always produces the same branch name and accumulated progress is preserved.
|
|
26
|
+
|
|
27
|
+
# OUTPUT
|
|
28
|
+
|
|
29
|
+
Output your plan as a JSON object wrapped in `<plan>` tags:
|
|
30
|
+
|
|
31
|
+
<plan>
|
|
32
|
+
{"issues": [{"id": "42", "title": "Fix auth bug", "branch": "sandcastle/issue-42"}]}
|
|
33
|
+
</plan>
|
|
34
|
+
|
|
35
|
+
Include only unblocked issues. If every issue is blocked, include the single highest-priority candidate (the one with the fewest or weakest dependencies).
|
|
36
|
+
|
|
37
|
+
Always emit the `<plan>` tags, even when there is nothing to do. If there are no issues to work on at all, output `<plan>{"issues": []}</plan>` so the run can exit cleanly.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Coding Standards
|
|
2
|
+
|
|
3
|
+
<!-- Customize this file with your project's coding standards.
|
|
4
|
+
The reviewer agent loads it during code review via @.sandcastle/CODING_STANDARDS.md
|
|
5
|
+
so these standards are enforced during review without costing tokens during implementation. -->
|
|
6
|
+
|
|
7
|
+
## Style
|
|
8
|
+
|
|
9
|
+
<!-- Example:
|
|
10
|
+
- Use camelCase for variables and functions
|
|
11
|
+
- Use PascalCase for classes and types
|
|
12
|
+
- Prefer named exports over default exports
|
|
13
|
+
-->
|
|
14
|
+
|
|
15
|
+
## Testing
|
|
16
|
+
|
|
17
|
+
<!-- Example:
|
|
18
|
+
- Every public function must have at least one test
|
|
19
|
+
- Use descriptive test names that explain the expected behavior
|
|
20
|
+
-->
|
|
21
|
+
|
|
22
|
+
## Architecture
|
|
23
|
+
|
|
24
|
+
<!-- Example:
|
|
25
|
+
- Keep modules focused on a single responsibility
|
|
26
|
+
- Prefer composition over inheritance
|
|
27
|
+
-->
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# TASK
|
|
2
|
+
|
|
3
|
+
Fix issue {{TASK_ID}}: {{ISSUE_TITLE}}
|
|
4
|
+
|
|
5
|
+
Pull in the issue using `{{VIEW_TASK_COMMAND}}`. If it has a parent PRD, pull that in too.
|
|
6
|
+
|
|
7
|
+
Only work on the issue specified.
|
|
8
|
+
|
|
9
|
+
Work on branch {{BRANCH}}. Make commits and run tests.
|
|
10
|
+
|
|
11
|
+
# CONTEXT
|
|
12
|
+
|
|
13
|
+
Here are the last 10 commits:
|
|
14
|
+
|
|
15
|
+
<recent-commits>
|
|
16
|
+
|
|
17
|
+
!`git log -n 10 --format="%H%n%ad%n%B---" --date=short`
|
|
18
|
+
|
|
19
|
+
</recent-commits>
|
|
20
|
+
|
|
21
|
+
# EXPLORATION
|
|
22
|
+
|
|
23
|
+
Explore the repo and fill your context window with relevant information that will allow you to complete the task.
|
|
24
|
+
|
|
25
|
+
Pay extra attention to test files that touch the relevant parts of the code.
|
|
26
|
+
|
|
27
|
+
# EXECUTION
|
|
28
|
+
|
|
29
|
+
If applicable, use RGR to complete the task.
|
|
30
|
+
|
|
31
|
+
1. RED: write one test
|
|
32
|
+
2. GREEN: write the implementation to pass that test
|
|
33
|
+
3. REPEAT until done
|
|
34
|
+
4. REFACTOR the code
|
|
35
|
+
|
|
36
|
+
# FEEDBACK LOOPS
|
|
37
|
+
|
|
38
|
+
Before committing, run `npm run typecheck` and `npm run test` to ensure the tests pass.
|
|
39
|
+
|
|
40
|
+
# COMMIT
|
|
41
|
+
|
|
42
|
+
Make a git commit. The commit message must:
|
|
43
|
+
|
|
44
|
+
1. Start with `RALPH:` prefix
|
|
45
|
+
2. Include task completed + PRD reference
|
|
46
|
+
3. Key decisions made
|
|
47
|
+
4. Files changed
|
|
48
|
+
5. Blockers or notes for next iteration
|
|
49
|
+
|
|
50
|
+
Keep it concise.
|
|
51
|
+
|
|
52
|
+
# THE ISSUE
|
|
53
|
+
|
|
54
|
+
If the task is not complete, leave a comment on the issue with what was done.
|
|
55
|
+
|
|
56
|
+
Do not close the issue - this will be done later.
|
|
57
|
+
|
|
58
|
+
Once complete, output <promise>COMPLETE</promise>.
|
|
59
|
+
|
|
60
|
+
# FINAL RULES
|
|
61
|
+
|
|
62
|
+
ONLY WORK ON A SINGLE TASK.
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
// Parallel Planner with Review — four-phase orchestration loop
|
|
2
|
+
//
|
|
3
|
+
// This template drives a multi-phase workflow:
|
|
4
|
+
// Phase 1 (Plan): An opus agent analyzes open issues, builds a
|
|
5
|
+
// dependency graph, and outputs a <plan> JSON
|
|
6
|
+
// listing unblocked issues with branch names.
|
|
7
|
+
// Phase 2 (Execute + Review): For each issue, a sandbox is created via
|
|
8
|
+
// createSandbox(). The implementer runs first
|
|
9
|
+
// (100 iterations). If it produces commits, a
|
|
10
|
+
// reviewer runs in the same sandbox on the same
|
|
11
|
+
// branch (1 iteration). All issue pipelines run
|
|
12
|
+
// concurrently via Promise.allSettled().
|
|
13
|
+
// Phase 3 (Merge): A single agent merges all completed branches
|
|
14
|
+
// into the current branch.
|
|
15
|
+
//
|
|
16
|
+
// The outer loop repeats up to MAX_ITERATIONS times so that newly unblocked
|
|
17
|
+
// issues are picked up after each round of merges.
|
|
18
|
+
//
|
|
19
|
+
// Usage:
|
|
20
|
+
// npx tsx .sandcastle/main.mts
|
|
21
|
+
// Or add to package.json:
|
|
22
|
+
// "scripts": { "sandcastle": "npx tsx .sandcastle/main.mts" }
|
|
23
|
+
|
|
24
|
+
import * as sandcastle from "@ai-hero/sandcastle";
|
|
25
|
+
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
|
|
26
|
+
import { z } from "zod";
|
|
27
|
+
|
|
28
|
+
// The planner emits its plan as JSON inside <plan> tags; Output.object extracts
|
|
29
|
+
// and validates it against this schema. We use Zod here, but any Standard
|
|
30
|
+
// Schema validator works just as well — Valibot, ArkType, etc. See
|
|
31
|
+
// https://standardschema.dev.
|
|
32
|
+
const planSchema = z.object({
|
|
33
|
+
issues: z.array(
|
|
34
|
+
z.object({ id: z.string(), title: z.string(), branch: z.string() }),
|
|
35
|
+
),
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Configuration
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
// Maximum number of plan→execute→merge cycles before stopping.
|
|
43
|
+
// Raise this if your backlog is large; lower it for a quick smoke-test run.
|
|
44
|
+
const MAX_ITERATIONS = 10;
|
|
45
|
+
|
|
46
|
+
// Hooks run inside the sandbox before the agent starts each iteration.
|
|
47
|
+
// npm install ensures the sandbox always has fresh dependencies.
|
|
48
|
+
const hooks = {
|
|
49
|
+
sandbox: { onSandboxReady: [{ command: "npm install" }] },
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// Copy node_modules from the host into the worktree before each sandbox
|
|
53
|
+
// starts. Avoids a full npm install from scratch; the hook above handles
|
|
54
|
+
// platform-specific binaries and any packages added since the last copy.
|
|
55
|
+
const copyToWorktree = ["node_modules"];
|
|
56
|
+
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Main loop
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
|
|
62
|
+
console.log(`\n=== Iteration ${iteration}/${MAX_ITERATIONS} ===\n`);
|
|
63
|
+
|
|
64
|
+
// -------------------------------------------------------------------------
|
|
65
|
+
// Phase 1: Plan
|
|
66
|
+
//
|
|
67
|
+
// The planning agent (opus, for deeper reasoning) reads the open issue list,
|
|
68
|
+
// builds a dependency graph, and selects the issues that can be worked in
|
|
69
|
+
// parallel right now (i.e., no blocking dependencies on other open issues).
|
|
70
|
+
//
|
|
71
|
+
// It outputs a <plan> JSON block — Output.object parses and validates it.
|
|
72
|
+
// -------------------------------------------------------------------------
|
|
73
|
+
const plan = await sandcastle.run({
|
|
74
|
+
hooks,
|
|
75
|
+
sandbox: docker(),
|
|
76
|
+
name: "planner",
|
|
77
|
+
// One iteration is enough: the planner just needs to read and reason,
|
|
78
|
+
// not write code. (Structured output requires maxIterations: 1.)
|
|
79
|
+
maxIterations: 1,
|
|
80
|
+
// Opus for planning: dependency analysis benefits from deeper reasoning.
|
|
81
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
82
|
+
promptFile: "./.sandcastle/plan-prompt.md",
|
|
83
|
+
// Extract and validate the <plan> JSON into a typed object. Throws
|
|
84
|
+
// StructuredOutputError if the tag is missing, the JSON is malformed, or
|
|
85
|
+
// validation fails — which aborts the loop.
|
|
86
|
+
output: sandcastle.Output.object({ tag: "plan", schema: planSchema }),
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const issues = plan.output.issues;
|
|
90
|
+
|
|
91
|
+
if (issues.length === 0) {
|
|
92
|
+
// No unblocked work — either everything is done or everything is blocked.
|
|
93
|
+
console.log("No unblocked issues to work on. Exiting.");
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
console.log(
|
|
98
|
+
`Planning complete. ${issues.length} issue(s) to work in parallel:`,
|
|
99
|
+
);
|
|
100
|
+
for (const issue of issues) {
|
|
101
|
+
console.log(` ${issue.id}: ${issue.title} → ${issue.branch}`);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// -------------------------------------------------------------------------
|
|
105
|
+
// Phase 2: Execute + Review
|
|
106
|
+
//
|
|
107
|
+
// For each issue, create a sandbox via createSandbox() so the implementer
|
|
108
|
+
// and reviewer share the same sandbox instance per branch. The implementer
|
|
109
|
+
// runs first; if it produces commits, the reviewer runs in the same sandbox.
|
|
110
|
+
//
|
|
111
|
+
// Promise.allSettled means one failing pipeline doesn't cancel the others.
|
|
112
|
+
// -------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
const settled = await Promise.allSettled(
|
|
115
|
+
issues.map(async (issue) => {
|
|
116
|
+
const sandbox = await sandcastle.createSandbox({
|
|
117
|
+
branch: issue.branch,
|
|
118
|
+
sandbox: docker(),
|
|
119
|
+
hooks,
|
|
120
|
+
copyToWorktree,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
// Run the implementer
|
|
125
|
+
const implement = await sandbox.run({
|
|
126
|
+
name: "implementer",
|
|
127
|
+
maxIterations: 100,
|
|
128
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
129
|
+
promptFile: "./.sandcastle/implement-prompt.md",
|
|
130
|
+
promptArgs: {
|
|
131
|
+
TASK_ID: issue.id,
|
|
132
|
+
ISSUE_TITLE: issue.title,
|
|
133
|
+
BRANCH: issue.branch,
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// Only review if the implementer produced commits
|
|
138
|
+
if (implement.commits.length > 0) {
|
|
139
|
+
const review = await sandbox.run({
|
|
140
|
+
name: "reviewer",
|
|
141
|
+
maxIterations: 1,
|
|
142
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
143
|
+
promptFile: "./.sandcastle/review-prompt.md",
|
|
144
|
+
promptArgs: {
|
|
145
|
+
BRANCH: issue.branch,
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// Merge commits from both runs so the merge phase sees all of them.
|
|
150
|
+
// Each sandbox.run() only returns commits from its own run.
|
|
151
|
+
return {
|
|
152
|
+
...review,
|
|
153
|
+
commits: [...implement.commits, ...review.commits],
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return implement;
|
|
158
|
+
} finally {
|
|
159
|
+
await sandbox.close();
|
|
160
|
+
}
|
|
161
|
+
}),
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
// Log any agents that threw (network error, sandbox crash, etc.).
|
|
165
|
+
for (const [i, outcome] of settled.entries()) {
|
|
166
|
+
if (outcome.status === "rejected") {
|
|
167
|
+
console.error(
|
|
168
|
+
` ✗ ${issues[i]!.id} (${issues[i]!.branch}) failed: ${outcome.reason}`,
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Only pass branches that actually produced commits to the merge phase.
|
|
174
|
+
// An agent that ran successfully but made no commits has nothing to merge.
|
|
175
|
+
const completedIssues = settled
|
|
176
|
+
.map((outcome, i) => ({ outcome, issue: issues[i]! }))
|
|
177
|
+
.filter(
|
|
178
|
+
(entry) =>
|
|
179
|
+
entry.outcome.status === "fulfilled" &&
|
|
180
|
+
entry.outcome.value.commits.length > 0,
|
|
181
|
+
)
|
|
182
|
+
.map((entry) => entry.issue);
|
|
183
|
+
|
|
184
|
+
const completedBranches = completedIssues.map((i) => i.branch);
|
|
185
|
+
|
|
186
|
+
console.log(
|
|
187
|
+
`\nExecution complete. ${completedBranches.length} branch(es) with commits:`,
|
|
188
|
+
);
|
|
189
|
+
for (const branch of completedBranches) {
|
|
190
|
+
console.log(` ${branch}`);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (completedBranches.length === 0) {
|
|
194
|
+
// All agents ran but none made commits — nothing to merge this cycle.
|
|
195
|
+
console.log("No commits produced. Nothing to merge.");
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// -------------------------------------------------------------------------
|
|
200
|
+
// Phase 3: Merge
|
|
201
|
+
//
|
|
202
|
+
// One agent merges all completed branches into the current branch,
|
|
203
|
+
// resolving any conflicts and running tests to confirm everything works.
|
|
204
|
+
//
|
|
205
|
+
// The {{BRANCHES}} and {{ISSUES}} prompt arguments are lists that the agent
|
|
206
|
+
// uses to know which branches to merge and which issues to close.
|
|
207
|
+
// -------------------------------------------------------------------------
|
|
208
|
+
await sandcastle.run({
|
|
209
|
+
hooks,
|
|
210
|
+
sandbox: docker(),
|
|
211
|
+
name: "merger",
|
|
212
|
+
maxIterations: 1,
|
|
213
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
214
|
+
promptFile: "./.sandcastle/merge-prompt.md",
|
|
215
|
+
promptArgs: {
|
|
216
|
+
// A markdown list of branch names, one per line.
|
|
217
|
+
BRANCHES: completedBranches.map((b) => `- ${b}`).join("\n"),
|
|
218
|
+
// A markdown list of issue IDs and titles, one per line.
|
|
219
|
+
ISSUES: completedIssues.map((i) => `- ${i.id}: ${i.title}`).join("\n"),
|
|
220
|
+
},
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
console.log("\nBranches merged.");
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
console.log("\nAll done.");
|