@rockclaver/sandcastle 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -36
- package/dist/index.d.ts +1 -1
- package/dist/index.js.map +1 -1
- package/dist/main.js +275 -3
- package/dist/main.js.map +1 -1
- package/dist/sandboxes/daytona.d.ts +1 -1
- package/dist/sandboxes/daytona.js.map +1 -1
- package/dist/templates/blank/main.mts +2 -2
- package/dist/templates/parallel-planner/main.mts +2 -2
- package/dist/templates/parallel-planner-with-review/main.mts +2 -2
- package/dist/templates/sequential-reviewer/main.mts +2 -2
- package/dist/templates/simple-loop/main.mts +2 -2
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -30,13 +30,13 @@ Sandcastle is provider-agnostic — it ships with built-in providers for Docker,
|
|
|
30
30
|
1. Install the package:
|
|
31
31
|
|
|
32
32
|
```bash
|
|
33
|
-
npm install --save-dev @
|
|
33
|
+
npm install --save-dev @rockclaver/sandcastle
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
-
2. Run `npx @
|
|
36
|
+
2. Run `npx @rockclaver/sandcastle init`. This scaffolds a `.sandcastle` directory with all the files needed.
|
|
37
37
|
|
|
38
38
|
```bash
|
|
39
|
-
npx @
|
|
39
|
+
npx @rockclaver/sandcastle init
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
3. Edit `.sandcastle/.env` and fill in your default values for `ANTHROPIC_API_KEY`. If you want to use your Claude subscription instead of an API key, see [#191](https://github.com/mattpocock/sandcastle/issues/191).
|
|
@@ -53,8 +53,8 @@ npx tsx .sandcastle/main.ts
|
|
|
53
53
|
|
|
54
54
|
```typescript
|
|
55
55
|
// 3. Run the agent via the JS API
|
|
56
|
-
import { run, claudeCode } from "@
|
|
57
|
-
import { docker } from "@
|
|
56
|
+
import { run, claudeCode } from "@rockclaver/sandcastle";
|
|
57
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
58
58
|
|
|
59
59
|
await run({
|
|
60
60
|
agent: claudeCode("claude-opus-4-7"),
|
|
@@ -67,20 +67,20 @@ await run({
|
|
|
67
67
|
|
|
68
68
|
Sandcastle uses a `SandboxProvider` to create isolated environments. The `sandbox` option on `run()`, `interactive()`, and `createSandbox()` accepts any provider, including `noSandbox()` — opt in to running the agent directly on the host when container isolation is undesired. Built-in providers:
|
|
69
69
|
|
|
70
|
-
| Provider | Import path
|
|
71
|
-
| ---------- |
|
|
72
|
-
| Docker | `@
|
|
73
|
-
| Podman | `@
|
|
74
|
-
| Vercel | `@
|
|
75
|
-
| No-sandbox | `@
|
|
70
|
+
| Provider | Import path | Type | Accepted by |
|
|
71
|
+
| ---------- | --------------------------------------------- | ---------- | ------------------------------------------- |
|
|
72
|
+
| Docker | `@rockclaver/sandcastle/sandboxes/docker` | Bind-mount | `run()`, `createSandbox()`, `interactive()` |
|
|
73
|
+
| Podman | `@rockclaver/sandcastle/sandboxes/podman` | Bind-mount | `run()`, `createSandbox()`, `interactive()` |
|
|
74
|
+
| Vercel | `@rockclaver/sandcastle/sandboxes/vercel` | Isolated | `run()`, `createSandbox()`, `interactive()` |
|
|
75
|
+
| No-sandbox | `@rockclaver/sandcastle/sandboxes/no-sandbox` | None | `run()`, `createSandbox()`, `interactive()` |
|
|
76
76
|
|
|
77
77
|
Worktree methods (`wt.run()`, `wt.interactive()`, `wt.createSandbox()`) accept the same providers as their top-level counterparts. `wt.interactive()` defaults to `noSandbox()` when no sandbox is specified.
|
|
78
78
|
|
|
79
79
|
```typescript
|
|
80
|
-
import { docker } from "@
|
|
81
|
-
import { podman } from "@
|
|
82
|
-
import { vercel } from "@
|
|
83
|
-
import { noSandbox } from "@
|
|
80
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
81
|
+
import { podman } from "@rockclaver/sandcastle/sandboxes/podman";
|
|
82
|
+
import { vercel } from "@rockclaver/sandcastle/sandboxes/vercel";
|
|
83
|
+
import { noSandbox } from "@rockclaver/sandcastle/sandboxes/no-sandbox";
|
|
84
84
|
|
|
85
85
|
// Docker, Podman, and Vercel are interchangeable in run() and createSandbox():
|
|
86
86
|
await run({
|
|
@@ -106,8 +106,8 @@ You can also [create your own provider](#custom-sandbox-providers) using `create
|
|
|
106
106
|
Sandcastle exports a programmatic `run()` function for use in scripts, CI pipelines, or custom tooling. The examples below use `docker()`, but any `SandboxProvider` works in its place.
|
|
107
107
|
|
|
108
108
|
```typescript
|
|
109
|
-
import { run, claudeCode } from "@
|
|
110
|
-
import { docker } from "@
|
|
109
|
+
import { run, claudeCode } from "@rockclaver/sandcastle";
|
|
110
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
111
111
|
|
|
112
112
|
const result = await run({
|
|
113
113
|
agent: claudeCode("claude-opus-4-7"),
|
|
@@ -124,8 +124,8 @@ console.log(result.branch); // target branch name
|
|
|
124
124
|
### All options
|
|
125
125
|
|
|
126
126
|
```typescript
|
|
127
|
-
import { run, claudeCode } from "@
|
|
128
|
-
import { docker } from "@
|
|
127
|
+
import { run, claudeCode } from "@rockclaver/sandcastle";
|
|
128
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
129
129
|
|
|
130
130
|
const result = await run({
|
|
131
131
|
// Agent provider — required. Pass a model string to claudeCode().
|
|
@@ -261,8 +261,8 @@ Use `run()` instead when you only need a single one-shot invocation — it handl
|
|
|
261
261
|
#### Basic single-run usage
|
|
262
262
|
|
|
263
263
|
```typescript
|
|
264
|
-
import { createSandbox, claudeCode } from "@
|
|
265
|
-
import { docker } from "@
|
|
264
|
+
import { createSandbox, claudeCode } from "@rockclaver/sandcastle";
|
|
265
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
266
266
|
|
|
267
267
|
await using sandbox = await createSandbox({
|
|
268
268
|
branch: "agent/fix-42",
|
|
@@ -280,8 +280,8 @@ console.log(result.commits); // [{ sha: "abc123" }]
|
|
|
280
280
|
#### Multi-run implement-then-review
|
|
281
281
|
|
|
282
282
|
```typescript
|
|
283
|
-
import { createSandbox, claudeCode } from "@
|
|
284
|
-
import { docker } from "@
|
|
283
|
+
import { createSandbox, claudeCode } from "@rockclaver/sandcastle";
|
|
284
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
285
285
|
|
|
286
286
|
await using sandbox = await createSandbox({
|
|
287
287
|
branch: "agent/fix-42",
|
|
@@ -386,7 +386,7 @@ Only `branch` and `merge-to-head` strategies are accepted; `head` is a compile-t
|
|
|
386
386
|
Pass `cwd` to target a repo other than `process.cwd()`. Relative paths resolve against `process.cwd()`; absolute paths pass through. A `CwdError` is thrown if the path does not exist or is not a directory.
|
|
387
387
|
|
|
388
388
|
```typescript
|
|
389
|
-
import { createWorktree } from "@
|
|
389
|
+
import { createWorktree } from "@rockclaver/sandcastle";
|
|
390
390
|
|
|
391
391
|
await using wt = await createWorktree({
|
|
392
392
|
branchStrategy: { type: "branch", branch: "agent/fix-42" },
|
|
@@ -413,7 +413,7 @@ const result = await wt.run({
|
|
|
413
413
|
console.log(result.commits); // commits made during the run
|
|
414
414
|
|
|
415
415
|
// Create a long-lived sandbox from the worktree
|
|
416
|
-
import { docker } from "@
|
|
416
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
417
417
|
|
|
418
418
|
await using sandbox = await wt.createSandbox({
|
|
419
419
|
sandbox: docker(),
|
|
@@ -558,7 +558,7 @@ If any command exits with a non-zero code, the run fails immediately with an err
|
|
|
558
558
|
Use `{{KEY}}` placeholders in your prompt to inject values from the `promptArgs` option. This is useful for reusing the same prompt file across multiple runs with different parameters.
|
|
559
559
|
|
|
560
560
|
```typescript
|
|
561
|
-
import { run } from "@
|
|
561
|
+
import { run } from "@rockclaver/sandcastle";
|
|
562
562
|
|
|
563
563
|
await run({
|
|
564
564
|
promptFile: "./my-prompt.md",
|
|
@@ -646,8 +646,8 @@ This is independent of `idleTimeoutSeconds`. They cover different phases: `idleT
|
|
|
646
646
|
Use `Output.object()` to extract a typed, schema-validated JSON payload from the agent's stdout. The agent emits its answer inside an XML tag you specify, and Sandcastle parses, validates, and returns it on `result.output`. The schema can be any [Standard Schema](https://standardschema.dev) validator — the examples below use [Zod](https://zod.dev), but Valibot, ArkType, and others work identically. See [ADR 0010](docs/adr/0010-structured-output.md) for design rationale.
|
|
647
647
|
|
|
648
648
|
```ts
|
|
649
|
-
import { run, Output, claudeCode } from "@
|
|
650
|
-
import { docker } from "@
|
|
649
|
+
import { run, Output, claudeCode } from "@rockclaver/sandcastle";
|
|
650
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
651
651
|
import { z } from "zod";
|
|
652
652
|
|
|
653
653
|
const result = await run({
|
|
@@ -672,7 +672,7 @@ console.log(result.output.score); // typed as number
|
|
|
672
672
|
When extraction or validation fails, `run()` throws a `StructuredOutputError`. Alongside `tag`, `rawMatched`, `cause`, `commits`, `branch`, and `preservedWorktreePath`, the error carries the `sessionId` (and `sessionFilePath`, when the session was captured) of the run that produced the bad output. You can resume that session to ask the agent to re-emit corrected output, without repeating the work:
|
|
673
673
|
|
|
674
674
|
```ts
|
|
675
|
-
import { run, Output, StructuredOutputError } from "@
|
|
675
|
+
import { run, Output, StructuredOutputError } from "@rockclaver/sandcastle";
|
|
676
676
|
|
|
677
677
|
try {
|
|
678
678
|
return await run({ ...opts, output });
|
|
@@ -689,9 +689,40 @@ try {
|
|
|
689
689
|
}
|
|
690
690
|
```
|
|
691
691
|
|
|
692
|
+
### Profiles
|
|
693
|
+
|
|
694
|
+
A **profile** describes the language/stack of the repo Sandcastle is operating on, so the scaffolded prompts and `main` setup point your agent at the right toolchain instead of assuming npm. Profiles are an internal registry shipped with Sandcastle — in v1 they are not user-defined config, and selecting one does **not** install, pin, or manage any SDK. The built-in profiles are `js-ts`, `flutter`, `dart`, and `go`.
|
|
695
|
+
|
|
696
|
+
Select one or more profiles during `sandcastle init`. The interactive prompt is a multi-select with `js-ts` selected by default; non-interactively, pass a comma-separated `--profile` flag:
|
|
697
|
+
|
|
698
|
+
```bash
|
|
699
|
+
# A Flutter app with a Go backend
|
|
700
|
+
npx @rockclaver/sandcastle init --profile flutter,go
|
|
701
|
+
|
|
702
|
+
# JS/TS only (the default when --profile is omitted)
|
|
703
|
+
npx @rockclaver/sandcastle init --profile js-ts
|
|
704
|
+
```
|
|
705
|
+
|
|
706
|
+
Profile names are de-duplicated while preserving first-occurrence order, and an unknown name fails fast with an error listing the valid profiles.
|
|
707
|
+
|
|
708
|
+
**Generated guidance files.** Each selected profile scaffolds a guidance markdown file plus a metadata file under `.sandcastle/profiles/`:
|
|
709
|
+
|
|
710
|
+
```
|
|
711
|
+
.sandcastle/profiles/
|
|
712
|
+
├── profiles.json # Metadata: selected profiles + the path to each guidance file
|
|
713
|
+
├── flutter.md # Per-profile guidance (one per selected profile)
|
|
714
|
+
└── go.md
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
Each `<profile>.md` describes the stack and lists suggested **setup** and **validation** commands (e.g. `flutter pub get` / `flutter analyze` / `flutter test` for Flutter, `go build ./...` / `go vet ./...` / `go test ./...` for Go). `profiles.json` lets templates and tooling discover which profiles were selected and where their guidance lives.
|
|
718
|
+
|
|
719
|
+
**How agents use them.** Every scaffolded prompt gains a "Project profiles" section that links the generated guidance files and instructs the agent to read each one and follow its setup and validation commands rather than assuming a fixed toolchain. When no JS/TS profile is selected, the generated `main` setup hook runs the primary profile's setup command (e.g. `flutter pub get`, `go mod download`) instead of `npm install`. The commands in the guidance files are advisory, not a contract — the agent should adapt them to the project's actual scripts.
|
|
720
|
+
|
|
721
|
+
**Out of scope.** Profiles are guidance only. Sandcastle does **not** install Flutter, Dart, or Go; does **not** pin or manage SDK versions; and assumes the relevant toolchain is already available in the sandbox image. Pin or install SDKs yourself by editing the scaffolded `Dockerfile`/`Containerfile`.
|
|
722
|
+
|
|
692
723
|
### Templates
|
|
693
724
|
|
|
694
|
-
`sandcastle init` prompts you to choose a sandbox provider (Docker or Podman), an issue tracker (GitHub Issues, Beads, or Custom), and a template, which scaffolds a ready-to-use prompt and `main.mts` suited to a specific workflow. If your project's `package.json` has `"type": "module"`, the file will be named `main.ts` instead. Choosing **Custom** scaffolds the project in a deliberately broken-until-configured state plus a `.sandcastle/SETUP_ISSUE_TRACKER.md` prompt you feed to your coding agent, which wires up your own tracker by editing the scaffolded files in place. Five templates are available:
|
|
725
|
+
`sandcastle init` prompts you to choose project profiles, a sandbox provider (Docker or Podman), an issue tracker (GitHub Issues, Beads, or Custom), and a template, which scaffolds a ready-to-use prompt and `main.mts` suited to a specific workflow. If your project's `package.json` has `"type": "module"`, the file will be named `main.ts` instead. The scaffolded prompts reference your selected profile guidance under `.sandcastle/profiles/` instead of assuming a fixed toolchain, and when no JS/TS profile is selected the generated `main` setup hook uses that profile's setup command (e.g. `flutter pub get`, `go mod download`) rather than `npm install`. Choosing **Custom** scaffolds the project in a deliberately broken-until-configured state plus a `.sandcastle/SETUP_ISSUE_TRACKER.md` prompt you feed to your coding agent, which wires up your own tracker by editing the scaffolded files in place. Five templates are available:
|
|
695
726
|
|
|
696
727
|
| Template | Description |
|
|
697
728
|
| ------------------------------ | ------------------------------------------------------------------------- |
|
|
@@ -713,11 +744,14 @@ Init detects your host package manager (npm, pnpm, yarn, or bun) from a `package
|
|
|
713
744
|
|
|
714
745
|
Every interactive prompt has a paired `--flag` so the entire init can run non-interactively (e.g. in CI or a scripted setup). When stdin is not a TTY and a required flag is missing, init fails fast with a clear error rather than wedging on a prompt.
|
|
715
746
|
|
|
747
|
+
Init checks common repository signals against selected profiles and prints warning-only feedback when they do not appear to match. For example, selecting `--profile go` in a repo with only `package.json` warns but still scaffolds, so monorepos and custom layouts can continue.
|
|
748
|
+
|
|
716
749
|
| Option | Required | Default | Description |
|
|
717
750
|
| ------------------------- | -------- | ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
718
751
|
| `--image-name` | No | `sandcastle:<repo-dir-name>` | Docker image name |
|
|
719
752
|
| `--agent` | No | Interactive multi-select | One or more agents, comma-separated (e.g. `claude-code,codex`). The first is the generated `agent()` default. Valid: `claude-code`, `pi`, `codex`, `cursor`, `opencode`, `copilot` |
|
|
720
753
|
| `--model` | No | Agent's default model | Pre-fills `AGENT_MODEL` in `.env.example` (e.g. `claude-sonnet-4-6`). Left commented out when unset |
|
|
754
|
+
| `--profile` | No | Interactive multi-select | One or more project profiles, comma-separated (e.g. `js-ts,go`). Non-interactive init defaults to `js-ts` when unset. Valid: `js-ts`, `flutter`, `dart`, `go` |
|
|
721
755
|
| `--sandbox` | No | Interactive prompt | Sandbox provider to use (`docker`, `podman`) |
|
|
722
756
|
| `--template` | No | Interactive prompt | Template to scaffold (e.g. `blank`, `simple-loop`) |
|
|
723
757
|
| `--issue-tracker` | No | Interactive prompt | Issue tracker to use (`github-issues`, `beads`, `custom`) |
|
|
@@ -731,6 +765,7 @@ Creates the following files:
|
|
|
731
765
|
.sandcastle/
|
|
732
766
|
├── Dockerfile # Sandbox environment (customize as needed)
|
|
733
767
|
├── prompt.md # Agent instructions
|
|
768
|
+
├── profiles/ # Selected profile metadata and guidance
|
|
734
769
|
├── .env.example # Token placeholders
|
|
735
770
|
└── .gitignore # Ignores .env, logs/
|
|
736
771
|
```
|
|
@@ -898,7 +933,7 @@ const [reviewA, reviewB] = await Promise.all([
|
|
|
898
933
|
Use `agent()` when the provider should be selected at runtime rather than hard-coded in `main.ts`. The resolver reads `AGENT` to choose a provider, reads `AGENT_MODEL` to override that provider's default model, and falls back to `default` when `AGENT` is unset:
|
|
899
934
|
|
|
900
935
|
```typescript
|
|
901
|
-
import { agent, run } from "@
|
|
936
|
+
import { agent, run } from "@rockclaver/sandcastle";
|
|
902
937
|
|
|
903
938
|
await run({
|
|
904
939
|
agent: agent({
|
|
@@ -1019,7 +1054,7 @@ import {
|
|
|
1019
1054
|
type BindMountCreateOptions,
|
|
1020
1055
|
type BindMountSandboxHandle,
|
|
1021
1056
|
type ExecResult,
|
|
1022
|
-
} from "@
|
|
1057
|
+
} from "@rockclaver/sandcastle";
|
|
1023
1058
|
import { execFile, spawn } from "node:child_process";
|
|
1024
1059
|
import { copyFile as fsCopyFile, mkdir as fsMkdir } from "node:fs/promises";
|
|
1025
1060
|
import { dirname } from "node:path";
|
|
@@ -1119,7 +1154,7 @@ import {
|
|
|
1119
1154
|
createIsolatedSandboxProvider,
|
|
1120
1155
|
type IsolatedSandboxHandle,
|
|
1121
1156
|
type ExecResult,
|
|
1122
|
-
} from "@
|
|
1157
|
+
} from "@rockclaver/sandcastle";
|
|
1123
1158
|
import { execFile, spawn } from "node:child_process";
|
|
1124
1159
|
import { copyFile, mkdir, mkdtemp, rm } from "node:fs/promises";
|
|
1125
1160
|
import { tmpdir } from "node:os";
|
|
@@ -1235,8 +1270,8 @@ A branch strategy controls where the agent's commits land. Configure it when con
|
|
|
1235
1270
|
Branch strategy is now configured on `run()`, not on the provider:
|
|
1236
1271
|
|
|
1237
1272
|
```typescript
|
|
1238
|
-
import { run, claudeCode } from "@
|
|
1239
|
-
import { docker } from "@
|
|
1273
|
+
import { run, claudeCode } from "@rockclaver/sandcastle";
|
|
1274
|
+
import { docker } from "@rockclaver/sandcastle/sandboxes/docker";
|
|
1240
1275
|
|
|
1241
1276
|
// head — direct write, bind-mount only (default for bind-mount providers)
|
|
1242
1277
|
await run({
|
|
@@ -1264,7 +1299,7 @@ await run({
|
|
|
1264
1299
|
Pass your custom provider via the `sandbox` option — it works the same as the built-in `docker()` provider:
|
|
1265
1300
|
|
|
1266
1301
|
```typescript
|
|
1267
|
-
import { run, claudeCode } from "@
|
|
1302
|
+
import { run, claudeCode } from "@rockclaver/sandcastle";
|
|
1268
1303
|
|
|
1269
1304
|
const result = await run({
|
|
1270
1305
|
agent: claudeCode("claude-opus-4-7"),
|
package/dist/index.d.ts
CHANGED
|
@@ -320,7 +320,7 @@ type OutputDefinition = OutputObjectDefinition<any> | OutputStringDefinition;
|
|
|
320
320
|
* Helpers for declaring structured output on `run()`.
|
|
321
321
|
*
|
|
322
322
|
* ```ts
|
|
323
|
-
* import { Output, run } from "@
|
|
323
|
+
* import { Output, run } from "@rockclaver/sandcastle";
|
|
324
324
|
* import { z } from "zod";
|
|
325
325
|
*
|
|
326
326
|
* const result = await run({
|