@rockclaver/sandcastle 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1355 -0
- package/dist/MountConfig-CmXclHA5.d.ts +26 -0
- package/dist/SandboxProvider-EkSMuBp8.d.ts +243 -0
- package/dist/chunk-72UVAC7B.js +99 -0
- package/dist/chunk-72UVAC7B.js.map +1 -0
- package/dist/chunk-BIWNFKGV.js +22 -0
- package/dist/chunk-BIWNFKGV.js.map +1 -0
- package/dist/chunk-FKX3DRTL.js +362 -0
- package/dist/chunk-FKX3DRTL.js.map +1 -0
- package/dist/chunk-NGBM7T3E.js +76 -0
- package/dist/chunk-NGBM7T3E.js.map +1 -0
- package/dist/chunk-QCLZLPJ7.js +26431 -0
- package/dist/chunk-QCLZLPJ7.js.map +1 -0
- package/dist/chunk-VAKEM3U2.js +26997 -0
- package/dist/chunk-VAKEM3U2.js.map +1 -0
- package/dist/index.d.ts +943 -0
- package/dist/index.js +2393 -0
- package/dist/index.js.map +1 -0
- package/dist/main.d.ts +1 -0
- package/dist/main.js +19268 -0
- package/dist/main.js.map +1 -0
- package/dist/mountUtils-CCA-bbpK.d.ts +25 -0
- package/dist/sandboxes/daytona.d.ts +60 -0
- package/dist/sandboxes/daytona.js +122 -0
- package/dist/sandboxes/daytona.js.map +1 -0
- package/dist/sandboxes/docker.d.ts +110 -0
- package/dist/sandboxes/docker.js +9 -0
- package/dist/sandboxes/docker.js.map +1 -0
- package/dist/sandboxes/no-sandbox.d.ts +38 -0
- package/dist/sandboxes/no-sandbox.js +7 -0
- package/dist/sandboxes/no-sandbox.js.map +1 -0
- package/dist/sandboxes/podman.d.ts +124 -0
- package/dist/sandboxes/podman.js +299 -0
- package/dist/sandboxes/podman.js.map +1 -0
- package/dist/sandboxes/vercel.d.ts +104 -0
- package/dist/sandboxes/vercel.js +148 -0
- package/dist/sandboxes/vercel.js.map +1 -0
- package/dist/templates/blank/main.mts +14 -0
- package/dist/templates/blank/prompt.md +12 -0
- package/dist/templates/blank/template.json +4 -0
- package/dist/templates/parallel-planner/implement-prompt.md +62 -0
- package/dist/templates/parallel-planner/main.mts +204 -0
- package/dist/templates/parallel-planner/merge-prompt.md +26 -0
- package/dist/templates/parallel-planner/plan-prompt.md +37 -0
- package/dist/templates/parallel-planner/template.json +4 -0
- package/dist/templates/parallel-planner-with-review/CODING_STANDARDS.md +27 -0
- package/dist/templates/parallel-planner-with-review/implement-prompt.md +62 -0
- package/dist/templates/parallel-planner-with-review/main.mts +226 -0
- package/dist/templates/parallel-planner-with-review/merge-prompt.md +26 -0
- package/dist/templates/parallel-planner-with-review/plan-prompt.md +37 -0
- package/dist/templates/parallel-planner-with-review/review-prompt.md +55 -0
- package/dist/templates/parallel-planner-with-review/template.json +4 -0
- package/dist/templates/sequential-reviewer/CODING_STANDARDS.md +27 -0
- package/dist/templates/sequential-reviewer/implement-prompt.md +53 -0
- package/dist/templates/sequential-reviewer/main.mts +119 -0
- package/dist/templates/sequential-reviewer/review-prompt.md +55 -0
- package/dist/templates/sequential-reviewer/template.json +4 -0
- package/dist/templates/simple-loop/main.mts +49 -0
- package/dist/templates/simple-loop/prompt.md +53 -0
- package/dist/templates/simple-loop/template.json +4 -0
- package/package.json +104 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# TASK
|
|
2
|
+
|
|
3
|
+
Merge the following branches into the current branch:
|
|
4
|
+
|
|
5
|
+
{{BRANCHES}}
|
|
6
|
+
|
|
7
|
+
For each branch:
|
|
8
|
+
|
|
9
|
+
1. Run `git merge <branch> --no-edit`
|
|
10
|
+
2. If there are merge conflicts, resolve them intelligently by reading both sides and choosing the correct resolution
|
|
11
|
+
3. After resolving conflicts, run `npm run typecheck` and `npm run test` to verify everything works
|
|
12
|
+
4. If tests fail, fix the issues before proceeding to the next branch
|
|
13
|
+
|
|
14
|
+
After all branches are merged, make a single commit summarizing the merge.
|
|
15
|
+
|
|
16
|
+
# CLOSE ISSUES
|
|
17
|
+
|
|
18
|
+
For each branch that was merged, close its issue using the following command:
|
|
19
|
+
|
|
20
|
+
`{{CLOSE_TASK_COMMAND}}`
|
|
21
|
+
|
|
22
|
+
Here are all the issues:
|
|
23
|
+
|
|
24
|
+
{{ISSUES}}
|
|
25
|
+
|
|
26
|
+
Once you've merged everything you can, output <promise>COMPLETE</promise>.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# ISSUES
|
|
2
|
+
|
|
3
|
+
Here are the open issues in the repo:
|
|
4
|
+
|
|
5
|
+
<issues-json>
|
|
6
|
+
|
|
7
|
+
!`{{LIST_TASKS_COMMAND}}`
|
|
8
|
+
|
|
9
|
+
</issues-json>
|
|
10
|
+
|
|
11
|
+
The list above has already been filtered to issues ready for work.
|
|
12
|
+
|
|
13
|
+
# TASK
|
|
14
|
+
|
|
15
|
+
Analyze the open issues and build a dependency graph. For each issue, determine whether it **blocks** or **is blocked by** any other open issue.
|
|
16
|
+
|
|
17
|
+
An issue B is **blocked by** issue A if:
|
|
18
|
+
|
|
19
|
+
- B requires code or infrastructure that A introduces
|
|
20
|
+
- B and A modify overlapping files or modules, making concurrent work likely to produce merge conflicts
|
|
21
|
+
- B's requirements depend on a decision or API shape that A will establish
|
|
22
|
+
|
|
23
|
+
An issue is **unblocked** if it has zero blocking dependencies on other open issues.
|
|
24
|
+
|
|
25
|
+
For each unblocked issue, assign a branch name using the exact format `sandcastle/issue-{id}` (no slug or other suffix). This must be deterministic so that re-planning the same issue always produces the same branch name and accumulated progress is preserved.
|
|
26
|
+
|
|
27
|
+
# OUTPUT
|
|
28
|
+
|
|
29
|
+
Output your plan as a JSON object wrapped in `<plan>` tags:
|
|
30
|
+
|
|
31
|
+
<plan>
|
|
32
|
+
{"issues": [{"id": "42", "title": "Fix auth bug", "branch": "sandcastle/issue-42"}]}
|
|
33
|
+
</plan>
|
|
34
|
+
|
|
35
|
+
Include only unblocked issues. If every issue is blocked, include the single highest-priority candidate (the one with the fewest or weakest dependencies).
|
|
36
|
+
|
|
37
|
+
Always emit the `<plan>` tags, even when there is nothing to do. If there are no issues to work on at all, output `<plan>{"issues": []}</plan>` so the run can exit cleanly.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# TASK
|
|
2
|
+
|
|
3
|
+
Review the code changes on branch `{{BRANCH}}` and improve code clarity, consistency, and maintainability while preserving exact functionality.
|
|
4
|
+
|
|
5
|
+
# CONTEXT
|
|
6
|
+
|
|
7
|
+
## Branch diff
|
|
8
|
+
|
|
9
|
+
!`git diff {{TARGET_BRANCH}}...{{BRANCH}}`
|
|
10
|
+
|
|
11
|
+
## Commits on this branch
|
|
12
|
+
|
|
13
|
+
!`git log {{TARGET_BRANCH}}..{{BRANCH}} --oneline`
|
|
14
|
+
|
|
15
|
+
# REVIEW PROCESS
|
|
16
|
+
|
|
17
|
+
1. **Understand the change**: Read the diff and commits above to understand the intent.
|
|
18
|
+
|
|
19
|
+
2. **Analyze for improvements**: Look for opportunities to:
|
|
20
|
+
- Reduce unnecessary complexity and nesting
|
|
21
|
+
- Eliminate redundant code and abstractions
|
|
22
|
+
- Improve readability through clear variable and function names
|
|
23
|
+
- Consolidate related logic
|
|
24
|
+
- Remove unnecessary comments that describe obvious code
|
|
25
|
+
- Avoid nested ternary operators - prefer switch statements or if/else chains
|
|
26
|
+
- Choose clarity over brevity - explicit code is often better than overly compact code
|
|
27
|
+
|
|
28
|
+
3. **Check correctness**:
|
|
29
|
+
- Does the implementation match the intent? Are edge cases handled?
|
|
30
|
+
- Are new/changed behaviours covered by tests?
|
|
31
|
+
- Are there unsafe casts, `any` types, or unchecked assumptions?
|
|
32
|
+
- Does the change introduce injection vulnerabilities, credential leaks, or other security issues?
|
|
33
|
+
|
|
34
|
+
4. **Maintain balance**: Avoid over-simplification that could:
|
|
35
|
+
- Reduce code clarity or maintainability
|
|
36
|
+
- Create overly clever solutions that are hard to understand
|
|
37
|
+
- Combine too many concerns into single functions or components
|
|
38
|
+
- Remove helpful abstractions that improve code organization
|
|
39
|
+
- Make the code harder to debug or extend
|
|
40
|
+
|
|
41
|
+
5. **Apply project standards**: Follow the coding standards defined in @.sandcastle/CODING_STANDARDS.md
|
|
42
|
+
|
|
43
|
+
6. **Preserve functionality**: Never change what the code does - only how it does it. All original features, outputs, and behaviors must remain intact.
|
|
44
|
+
|
|
45
|
+
# EXECUTION
|
|
46
|
+
|
|
47
|
+
If you find improvements to make:
|
|
48
|
+
|
|
49
|
+
1. Make the changes directly on this branch
|
|
50
|
+
2. Run tests and type checking to ensure nothing is broken
|
|
51
|
+
3. Commit describing the refinements
|
|
52
|
+
|
|
53
|
+
If the code is already clean and well-structured, do nothing.
|
|
54
|
+
|
|
55
|
+
Once complete, output <promise>COMPLETE</promise>.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Coding Standards
|
|
2
|
+
|
|
3
|
+
<!-- Customize this file with your project's coding standards.
|
|
4
|
+
The reviewer agent loads it during code review via @.sandcastle/CODING_STANDARDS.md
|
|
5
|
+
so these standards are enforced during review without costing tokens during implementation. -->
|
|
6
|
+
|
|
7
|
+
## Style
|
|
8
|
+
|
|
9
|
+
<!-- Example:
|
|
10
|
+
- Use camelCase for variables and functions
|
|
11
|
+
- Use PascalCase for classes and types
|
|
12
|
+
- Prefer named exports over default exports
|
|
13
|
+
-->
|
|
14
|
+
|
|
15
|
+
## Testing
|
|
16
|
+
|
|
17
|
+
<!-- Example:
|
|
18
|
+
- Every public function must have at least one test
|
|
19
|
+
- Use descriptive test names that explain the expected behavior
|
|
20
|
+
-->
|
|
21
|
+
|
|
22
|
+
## Architecture
|
|
23
|
+
|
|
24
|
+
<!-- Example:
|
|
25
|
+
- Keep modules focused on a single responsibility
|
|
26
|
+
- Prefer composition over inheritance
|
|
27
|
+
-->
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Context
|
|
2
|
+
|
|
3
|
+
## Open issues
|
|
4
|
+
|
|
5
|
+
!`{{LIST_TASKS_COMMAND}}`
|
|
6
|
+
|
|
7
|
+
The list above has already been filtered to issues ready for work and is the sole source of truth for what work exists. Do not run your own unfiltered query to find more issues — if the list is empty, there is nothing to do.
|
|
8
|
+
|
|
9
|
+
## Recent RALPH commits (last 10)
|
|
10
|
+
|
|
11
|
+
!`git log --oneline --grep="RALPH" -10`
|
|
12
|
+
|
|
13
|
+
# Task
|
|
14
|
+
|
|
15
|
+
You are RALPH — an autonomous coding agent working through issues one at a time.
|
|
16
|
+
|
|
17
|
+
## Priority order
|
|
18
|
+
|
|
19
|
+
Work on issues in this order:
|
|
20
|
+
|
|
21
|
+
1. **Bug fixes** — broken behaviour affecting users
|
|
22
|
+
2. **Tracer bullets** — thin end-to-end slices that prove an approach works
|
|
23
|
+
3. **Polish** — improving existing functionality (error messages, UX, docs)
|
|
24
|
+
4. **Refactors** — internal cleanups with no user-visible change
|
|
25
|
+
|
|
26
|
+
Pick the highest-priority open issue that is not blocked by another open issue.
|
|
27
|
+
|
|
28
|
+
## Workflow
|
|
29
|
+
|
|
30
|
+
1. **Explore** — read the issue carefully. Pull in the parent PRD if referenced. Read the relevant source files and tests before writing any code.
|
|
31
|
+
2. **Plan** — decide what to change and why. Keep the change as small as possible.
|
|
32
|
+
3. **Execute** — use RGR (Red → Green → Repeat → Refactor): write a failing test first, then write the implementation to pass it.
|
|
33
|
+
4. **Verify** — run `npm run typecheck` and `npm run test` before committing. Fix any failures before proceeding.
|
|
34
|
+
5. **Commit** — make a single git commit. The message MUST:
|
|
35
|
+
- Start with `RALPH:` prefix
|
|
36
|
+
- Include the task completed and any PRD reference
|
|
37
|
+
- List key decisions made
|
|
38
|
+
- List files changed
|
|
39
|
+
- Note any blockers for the next iteration
|
|
40
|
+
6. **Close** — close the issue with `{{CLOSE_TASK_COMMAND}}` explaining what was done.
|
|
41
|
+
|
|
42
|
+
## Rules
|
|
43
|
+
|
|
44
|
+
- Work on **one issue per iteration**. Do not attempt multiple issues in a single iteration.
|
|
45
|
+
- Do not close an issue until you have committed the fix and verified tests pass.
|
|
46
|
+
- Do not leave commented-out code or TODO comments in committed code.
|
|
47
|
+
- If you are blocked (missing context, failing tests you cannot fix, external dependency), leave a comment on the issue and move on — do not close it.
|
|
48
|
+
|
|
49
|
+
# Done
|
|
50
|
+
|
|
51
|
+
When all actionable issues are complete (or you are blocked on all remaining ones), or the open-issues block at the top of this prompt is empty, output the completion signal:
|
|
52
|
+
|
|
53
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
// Sequential Reviewer — implement-then-review loop
|
|
2
|
+
//
|
|
3
|
+
// This template drives a two-phase workflow per issue:
|
|
4
|
+
// Phase 1 (Implement): A sonnet agent picks an open issue, works on it
|
|
5
|
+
// on a dedicated branch, commits the changes, and signals
|
|
6
|
+
// completion.
|
|
7
|
+
// Phase 2 (Review): A second sonnet agent reviews the branch diff and either
|
|
8
|
+
// approves it or makes corrections directly on the branch.
|
|
9
|
+
//
|
|
10
|
+
// Both phases share a single sandbox created via createSandbox(), so the
|
|
11
|
+
// implementer and reviewer work on the same explicit branch.
|
|
12
|
+
//
|
|
13
|
+
// The outer loop repeats up to MAX_ITERATIONS times, processing one issue per
|
|
14
|
+
// iteration and stopping early once the backlog is exhausted (an implement
|
|
15
|
+
// phase that produces no commits). This is a middle-complexity option between
|
|
16
|
+
// the simple-loop (no review gate) and the parallel-planner (concurrent
|
|
17
|
+
// execution with a planning phase).
|
|
18
|
+
//
|
|
19
|
+
// Usage:
|
|
20
|
+
// npx tsx .sandcastle/main.mts
|
|
21
|
+
// Or add to package.json:
|
|
22
|
+
// "scripts": { "sandcastle": "npx tsx .sandcastle/main.mts" }
|
|
23
|
+
|
|
24
|
+
import * as sandcastle from "@ai-hero/sandcastle";
|
|
25
|
+
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Configuration
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
// Maximum number of implement→review cycles to run before stopping.
|
|
32
|
+
// Each cycle works on one issue. Raise this to process more issues per run.
|
|
33
|
+
const MAX_ITERATIONS = 10;
|
|
34
|
+
|
|
35
|
+
// Hooks run inside the sandbox before the agent starts each iteration.
|
|
36
|
+
// npm install ensures the sandbox always has fresh dependencies.
|
|
37
|
+
const hooks = {
|
|
38
|
+
sandbox: { onSandboxReady: [{ command: "npm install" }] },
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// Copy node_modules from the host into the worktree before each sandbox
|
|
42
|
+
// starts. Avoids a full npm install from scratch; the hook above handles
|
|
43
|
+
// platform-specific binaries and any packages added since the last copy.
|
|
44
|
+
const copyToWorktree = ["node_modules"];
|
|
45
|
+
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Main loop
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
|
|
51
|
+
console.log(`\n=== Iteration ${iteration}/${MAX_ITERATIONS} ===\n`);
|
|
52
|
+
|
|
53
|
+
// Generate a unique branch name for this iteration.
|
|
54
|
+
const branch = `sandcastle/sequential-reviewer/${Date.now()}`;
|
|
55
|
+
|
|
56
|
+
// Create a single sandbox that both the implementer and reviewer share.
|
|
57
|
+
// This gives both agents a real, named branch that persists across phases.
|
|
58
|
+
const sandbox = await sandcastle.createSandbox({
|
|
59
|
+
branch,
|
|
60
|
+
sandbox: docker(),
|
|
61
|
+
hooks,
|
|
62
|
+
copyToWorktree,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
// -----------------------------------------------------------------------
|
|
67
|
+
// Phase 1: Implement
|
|
68
|
+
//
|
|
69
|
+
// A sonnet agent picks the next open issue, writes the
|
|
70
|
+
// implementation (using RGR: Red → Green → Repeat → Refactor), and
|
|
71
|
+
// commits the result.
|
|
72
|
+
//
|
|
73
|
+
// The agent signals completion via <promise>COMPLETE</promise> when done.
|
|
74
|
+
// -----------------------------------------------------------------------
|
|
75
|
+
// One iteration so each outer pass implements a single issue on its own
|
|
76
|
+
// branch, then hands it to the reviewer. A higher value lets the agent
|
|
77
|
+
// drain the whole backlog onto this one branch in a single pass, which
|
|
78
|
+
// defeats the per-issue review.
|
|
79
|
+
const implement = await sandbox.run({
|
|
80
|
+
name: "implementer",
|
|
81
|
+
maxIterations: 1,
|
|
82
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
83
|
+
promptFile: "./.sandcastle/implement-prompt.md",
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
if (!implement.commits.length) {
|
|
87
|
+
// No commits means the backlog is empty or every remaining issue is
|
|
88
|
+
// blocked — there is nothing left to implement or review, so stop.
|
|
89
|
+
console.log("Implementation agent made no commits. Stopping.");
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
console.log(`\nImplementation complete on branch: ${branch}`);
|
|
94
|
+
console.log(`Commits: ${implement.commits.length}`);
|
|
95
|
+
|
|
96
|
+
// -----------------------------------------------------------------------
|
|
97
|
+
// Phase 2: Review
|
|
98
|
+
//
|
|
99
|
+
// A second sonnet agent reviews the diff of the branch produced by
|
|
100
|
+
// Phase 1. It uses the {{BRANCH}} prompt argument to inspect the right
|
|
101
|
+
// branch, and either approves or makes corrections directly on the branch.
|
|
102
|
+
// -----------------------------------------------------------------------
|
|
103
|
+
await sandbox.run({
|
|
104
|
+
name: "reviewer",
|
|
105
|
+
maxIterations: 1,
|
|
106
|
+
agent: sandcastle.agent({ default: "claude-code" }),
|
|
107
|
+
promptFile: "./.sandcastle/review-prompt.md",
|
|
108
|
+
promptArgs: {
|
|
109
|
+
BRANCH: branch,
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
console.log("\nReview complete.");
|
|
114
|
+
} finally {
|
|
115
|
+
await sandbox.close();
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
console.log("\nAll done.");
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# TASK
|
|
2
|
+
|
|
3
|
+
Review the code changes on branch `{{BRANCH}}` and improve code clarity, consistency, and maintainability while preserving exact functionality.
|
|
4
|
+
|
|
5
|
+
# CONTEXT
|
|
6
|
+
|
|
7
|
+
## Branch diff
|
|
8
|
+
|
|
9
|
+
!`git diff {{TARGET_BRANCH}}...{{BRANCH}}`
|
|
10
|
+
|
|
11
|
+
## Commits on this branch
|
|
12
|
+
|
|
13
|
+
!`git log {{TARGET_BRANCH}}..{{BRANCH}} --oneline`
|
|
14
|
+
|
|
15
|
+
# REVIEW PROCESS
|
|
16
|
+
|
|
17
|
+
1. **Understand the change**: Read the diff and commits above to understand the intent.
|
|
18
|
+
|
|
19
|
+
2. **Analyze for improvements**: Look for opportunities to:
|
|
20
|
+
- Reduce unnecessary complexity and nesting
|
|
21
|
+
- Eliminate redundant code and abstractions
|
|
22
|
+
- Improve readability through clear variable and function names
|
|
23
|
+
- Consolidate related logic
|
|
24
|
+
- Remove unnecessary comments that describe obvious code
|
|
25
|
+
- Avoid nested ternary operators - prefer switch statements or if/else chains
|
|
26
|
+
- Choose clarity over brevity - explicit code is often better than overly compact code
|
|
27
|
+
|
|
28
|
+
3. **Check correctness**:
|
|
29
|
+
- Does the implementation match the intent? Are edge cases handled?
|
|
30
|
+
- Are new/changed behaviours covered by tests?
|
|
31
|
+
- Are there unsafe casts, `any` types, or unchecked assumptions?
|
|
32
|
+
- Does the change introduce injection vulnerabilities, credential leaks, or other security issues?
|
|
33
|
+
|
|
34
|
+
4. **Maintain balance**: Avoid over-simplification that could:
|
|
35
|
+
- Reduce code clarity or maintainability
|
|
36
|
+
- Create overly clever solutions that are hard to understand
|
|
37
|
+
- Combine too many concerns into single functions or components
|
|
38
|
+
- Remove helpful abstractions that improve code organization
|
|
39
|
+
- Make the code harder to debug or extend
|
|
40
|
+
|
|
41
|
+
5. **Apply project standards**: Follow the coding standards defined in @.sandcastle/CODING_STANDARDS.md
|
|
42
|
+
|
|
43
|
+
6. **Preserve functionality**: Never change what the code does - only how it does it. All original features, outputs, and behaviors must remain intact.
|
|
44
|
+
|
|
45
|
+
# EXECUTION
|
|
46
|
+
|
|
47
|
+
If you find improvements to make:
|
|
48
|
+
|
|
49
|
+
1. Make the changes directly on this branch
|
|
50
|
+
2. Run tests and type checking to ensure nothing is broken
|
|
51
|
+
3. Commit describing the refinements
|
|
52
|
+
|
|
53
|
+
If the code is already clean and well-structured, do nothing.
|
|
54
|
+
|
|
55
|
+
Once complete, output <promise>COMPLETE</promise>.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { run, agent } from "@ai-hero/sandcastle";
|
|
2
|
+
import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
|
|
3
|
+
|
|
4
|
+
// Simple loop: an agent that picks open issues one by one and closes them.
|
|
5
|
+
// Run this with: npx tsx .sandcastle/main.mts
|
|
6
|
+
// Or add to package.json scripts: "sandcastle": "npx tsx .sandcastle/main.mts"
|
|
7
|
+
|
|
8
|
+
await run({
|
|
9
|
+
// A name for this run, shown as a prefix in log output.
|
|
10
|
+
name: "worker",
|
|
11
|
+
|
|
12
|
+
// Sandbox provider — runs the agent inside an isolated container.
|
|
13
|
+
sandbox: docker(),
|
|
14
|
+
|
|
15
|
+
// The agent provider is resolved at runtime by agent(): the AGENT env var
|
|
16
|
+
// (or this baked default) picks the provider, AGENT_MODEL picks the model.
|
|
17
|
+
agent: agent({ default: "claude-code" }),
|
|
18
|
+
|
|
19
|
+
// Path to the prompt file. Shell expressions inside are evaluated inside the
|
|
20
|
+
// sandbox at the start of each iteration, so the agent always sees fresh data.
|
|
21
|
+
promptFile: "./.sandcastle/prompt.md",
|
|
22
|
+
|
|
23
|
+
// Maximum number of iterations (agent invocations) to run in a session.
|
|
24
|
+
// Each iteration works on a single issue. Increase this to process more issues
|
|
25
|
+
// per run, or set it to 1 for a single-shot mode.
|
|
26
|
+
maxIterations: 3,
|
|
27
|
+
|
|
28
|
+
// Branch strategy — merge-to-head creates a temporary branch for the agent
|
|
29
|
+
// to work on, then merges the result back to HEAD when the run completes.
|
|
30
|
+
// This is required when using copyToWorktree, since head mode bind-mounts
|
|
31
|
+
// the host directory directly (no worktree to copy into).
|
|
32
|
+
branchStrategy: { type: "merge-to-head" },
|
|
33
|
+
|
|
34
|
+
// Copy node_modules from the host into the worktree before the sandbox
|
|
35
|
+
// starts. This avoids a full npm install from scratch on every iteration.
|
|
36
|
+
// The onSandboxReady hook still runs npm install as a safety net to handle
|
|
37
|
+
// platform-specific binaries and any packages added since the last copy.
|
|
38
|
+
copyToWorktree: ["node_modules"],
|
|
39
|
+
|
|
40
|
+
// Lifecycle hooks — commands grouped by where they run (host or sandbox).
|
|
41
|
+
hooks: {
|
|
42
|
+
sandbox: {
|
|
43
|
+
// onSandboxReady runs once after the sandbox is initialised and the repo is
|
|
44
|
+
// synced in, before the agent starts. Use it to install dependencies or run
|
|
45
|
+
// any other setup steps your project needs.
|
|
46
|
+
onSandboxReady: [{ command: "npm install" }],
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
});
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Context
|
|
2
|
+
|
|
3
|
+
## Open issues
|
|
4
|
+
|
|
5
|
+
!`{{LIST_TASKS_COMMAND}}`
|
|
6
|
+
|
|
7
|
+
The list above has already been filtered to issues ready for work and is the sole source of truth for what work exists. Do not run your own unfiltered query to find more issues — if the list is empty, there is nothing to do.
|
|
8
|
+
|
|
9
|
+
## Recent RALPH commits (last 10)
|
|
10
|
+
|
|
11
|
+
!`git log --oneline --grep="RALPH" -10`
|
|
12
|
+
|
|
13
|
+
# Task
|
|
14
|
+
|
|
15
|
+
You are RALPH — an autonomous coding agent working through issues one at a time.
|
|
16
|
+
|
|
17
|
+
## Priority order
|
|
18
|
+
|
|
19
|
+
Work on issues in this order:
|
|
20
|
+
|
|
21
|
+
1. **Bug fixes** — broken behaviour affecting users
|
|
22
|
+
2. **Tracer bullets** — thin end-to-end slices that prove an approach works
|
|
23
|
+
3. **Polish** — improving existing functionality (error messages, UX, docs)
|
|
24
|
+
4. **Refactors** — internal cleanups with no user-visible change
|
|
25
|
+
|
|
26
|
+
Pick the highest-priority open issue that is not blocked by another open issue.
|
|
27
|
+
|
|
28
|
+
## Workflow
|
|
29
|
+
|
|
30
|
+
1. **Explore** — read the issue carefully. Pull in the parent PRD if referenced. Read the relevant source files and tests before writing any code.
|
|
31
|
+
2. **Plan** — decide what to change and why. Keep the change as small as possible.
|
|
32
|
+
3. **Execute** — use RGR (Red → Green → Repeat → Refactor): write a failing test first, then write the implementation to pass it.
|
|
33
|
+
4. **Verify** — run `npm run typecheck` and `npm run test` before committing. Fix any failures before proceeding.
|
|
34
|
+
5. **Commit** — make a single git commit. The message MUST:
|
|
35
|
+
- Start with `RALPH:` prefix
|
|
36
|
+
- Include the task completed and any PRD reference
|
|
37
|
+
- List key decisions made
|
|
38
|
+
- List files changed
|
|
39
|
+
- Note any blockers for the next iteration
|
|
40
|
+
6. **Close** — close the issue with `{{CLOSE_TASK_COMMAND}}` explaining what was done.
|
|
41
|
+
|
|
42
|
+
## Rules
|
|
43
|
+
|
|
44
|
+
- Work on **one issue per iteration**. Do not attempt multiple issues in a single iteration.
|
|
45
|
+
- Do not close an issue until you have committed the fix and verified tests pass.
|
|
46
|
+
- Do not leave commented-out code or TODO comments in committed code.
|
|
47
|
+
- If you are blocked (missing context, failing tests you cannot fix, external dependency), leave a comment on the issue and move on — do not close it.
|
|
48
|
+
|
|
49
|
+
# Done
|
|
50
|
+
|
|
51
|
+
When all actionable issues are complete (or you are blocked on all remaining ones), or the open-issues block at the top of this prompt is empty, output the completion signal:
|
|
52
|
+
|
|
53
|
+
<promise>COMPLETE</promise>
|
package/package.json
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@rockclaver/sandcastle",
|
|
3
|
+
"version": "0.7.0",
|
|
4
|
+
"description": "CLI for orchestrating AI agents in isolated sandbox environments",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"types": "./dist/index.d.ts"
|
|
12
|
+
},
|
|
13
|
+
"./sandboxes/docker": {
|
|
14
|
+
"import": "./dist/sandboxes/docker.js",
|
|
15
|
+
"types": "./dist/sandboxes/docker.d.ts"
|
|
16
|
+
},
|
|
17
|
+
"./sandboxes/vercel": {
|
|
18
|
+
"import": "./dist/sandboxes/vercel.js",
|
|
19
|
+
"types": "./dist/sandboxes/vercel.d.ts"
|
|
20
|
+
},
|
|
21
|
+
"./sandboxes/podman": {
|
|
22
|
+
"import": "./dist/sandboxes/podman.js",
|
|
23
|
+
"types": "./dist/sandboxes/podman.d.ts"
|
|
24
|
+
},
|
|
25
|
+
"./sandboxes/daytona": {
|
|
26
|
+
"import": "./dist/sandboxes/daytona.js",
|
|
27
|
+
"types": "./dist/sandboxes/daytona.d.ts"
|
|
28
|
+
},
|
|
29
|
+
"./sandboxes/no-sandbox": {
|
|
30
|
+
"import": "./dist/sandboxes/no-sandbox.js",
|
|
31
|
+
"types": "./dist/sandboxes/no-sandbox.d.ts"
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"bin": {
|
|
35
|
+
"sandcastle": "dist/main.js"
|
|
36
|
+
},
|
|
37
|
+
"scripts": {
|
|
38
|
+
"build": "tsup",
|
|
39
|
+
"postbuild": "rm -rf dist/templates && cp -r src/templates dist/templates && node scripts/check-public-types-effect-free.mjs",
|
|
40
|
+
"pretest": "npm run build",
|
|
41
|
+
"test": "vitest run",
|
|
42
|
+
"test:watch": "vitest",
|
|
43
|
+
"typecheck": "tsgo --noEmit",
|
|
44
|
+
"format": "prettier --write .",
|
|
45
|
+
"format:check": "prettier --check .",
|
|
46
|
+
"prepare": "husky",
|
|
47
|
+
"release": "changeset publish",
|
|
48
|
+
"sandcastle": "npm run build && tsx .sandcastle/run.ts",
|
|
49
|
+
"test-podman": "npm run build && tsx --env-file=.sandcastle/.env .sandcastle/test-podman.ts",
|
|
50
|
+
"test-vercel": "npm run build && tsx --env-file=.sandcastle/.env .sandcastle/test-vercel.ts",
|
|
51
|
+
"test-interactive": "npm run build && tsx --env-file=.sandcastle/.env .sandcastle/test-interactive.ts"
|
|
52
|
+
},
|
|
53
|
+
"keywords": [
|
|
54
|
+
"cli",
|
|
55
|
+
"sandbox",
|
|
56
|
+
"docker",
|
|
57
|
+
"ai",
|
|
58
|
+
"agent"
|
|
59
|
+
],
|
|
60
|
+
"packageManager": "npm@10.9.2",
|
|
61
|
+
"repository": {
|
|
62
|
+
"type": "git",
|
|
63
|
+
"url": "https://github.com/mattpocock/sandcastle"
|
|
64
|
+
},
|
|
65
|
+
"license": "MIT",
|
|
66
|
+
"devDependencies": {
|
|
67
|
+
"@changesets/cli": "^2.30.0",
|
|
68
|
+
"@daytona/sdk": "^0.164.0",
|
|
69
|
+
"@effect/cli": "^0.74.0",
|
|
70
|
+
"@effect/platform": "^0.95.0",
|
|
71
|
+
"@effect/platform-node": "^0.105.0",
|
|
72
|
+
"@effect/printer": "^0.48.0",
|
|
73
|
+
"@effect/printer-ansi": "^0.48.0",
|
|
74
|
+
"@types/node": "^25.5.0",
|
|
75
|
+
"@typescript/native-preview": "^7.0.0-dev.20260317.1",
|
|
76
|
+
"effect": "^3.20.0",
|
|
77
|
+
"husky": "^9.1.7",
|
|
78
|
+
"lint-staged": "^15.5.1",
|
|
79
|
+
"prettier": "^3.5.3",
|
|
80
|
+
"tsup": "^8.5.1",
|
|
81
|
+
"tsx": "^4.21.0",
|
|
82
|
+
"typescript": "^6.0.3",
|
|
83
|
+
"vitest": "^3.2.0",
|
|
84
|
+
"zod": "^4.4.3"
|
|
85
|
+
},
|
|
86
|
+
"dependencies": {
|
|
87
|
+
"@clack/prompts": "^1.1.0"
|
|
88
|
+
},
|
|
89
|
+
"peerDependencies": {
|
|
90
|
+
"@daytona/sdk": "^0.164.0",
|
|
91
|
+
"@vercel/sandbox": ">=1.0.0"
|
|
92
|
+
},
|
|
93
|
+
"peerDependenciesMeta": {
|
|
94
|
+
"@vercel/sandbox": {
|
|
95
|
+
"optional": true
|
|
96
|
+
},
|
|
97
|
+
"@daytona/sdk": {
|
|
98
|
+
"optional": true
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
"files": [
|
|
102
|
+
"dist"
|
|
103
|
+
]
|
|
104
|
+
}
|