cclaw-cli 0.48.35 → 0.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -82
- package/dist/artifact-linter.d.ts +4 -0
- package/dist/artifact-linter.js +24 -3
- package/dist/cli.d.ts +1 -19
- package/dist/cli.js +49 -495
- package/dist/constants.d.ts +2 -13
- package/dist/constants.js +1 -46
- package/dist/content/closeout-guidance.d.ts +14 -0
- package/dist/content/closeout-guidance.js +42 -0
- package/dist/content/core-agents.js +51 -9
- package/dist/content/decision-protocol.d.ts +12 -0
- package/dist/content/decision-protocol.js +20 -0
- package/dist/content/diff-command.d.ts +1 -2
- package/dist/content/diff-command.js +8 -94
- package/dist/content/examples.d.ts +4 -10
- package/dist/content/examples.js +10 -20
- package/dist/content/hook-events.js +2 -2
- package/dist/content/hook-inline-snippets.d.ts +5 -2
- package/dist/content/hook-inline-snippets.js +33 -1
- package/dist/content/hook-manifest.d.ts +3 -4
- package/dist/content/hook-manifest.js +11 -12
- package/dist/content/hooks.js +2 -0
- package/dist/content/ideate-command.d.ts +2 -0
- package/dist/content/ideate-command.js +31 -25
- package/dist/content/iron-laws.d.ts +5 -5
- package/dist/content/iron-laws.js +5 -5
- package/dist/content/learnings.d.ts +3 -4
- package/dist/content/learnings.js +24 -50
- package/dist/content/meta-skill.js +31 -24
- package/dist/content/next-command.js +38 -38
- package/dist/content/node-hooks.js +17 -343
- package/dist/content/opencode-plugin.js +2 -100
- package/dist/content/research-playbooks.js +14 -14
- package/dist/content/review-loop.d.ts +2 -0
- package/dist/content/review-loop.js +8 -0
- package/dist/content/session-hooks.js +14 -46
- package/dist/content/skills.d.ts +0 -5
- package/dist/content/skills.js +53 -128
- package/dist/content/stage-common-guidance.d.ts +0 -1
- package/dist/content/stage-common-guidance.js +15 -14
- package/dist/content/stage-schema.d.ts +26 -1
- package/dist/content/stage-schema.js +121 -40
- package/dist/content/stages/_lint-metadata/index.js +9 -15
- package/dist/content/stages/brainstorm.js +22 -43
- package/dist/content/stages/design.js +37 -57
- package/dist/content/stages/plan.js +22 -13
- package/dist/content/stages/review.js +24 -27
- package/dist/content/stages/scope.js +34 -46
- package/dist/content/stages/ship.js +7 -4
- package/dist/content/stages/spec.js +20 -9
- package/dist/content/stages/tdd.js +64 -44
- package/dist/content/start-command.js +10 -12
- package/dist/content/status-command.d.ts +2 -7
- package/dist/content/status-command.js +19 -146
- package/dist/content/subagents.d.ts +0 -5
- package/dist/content/subagents.js +47 -28
- package/dist/content/templates.d.ts +1 -1
- package/dist/content/templates.js +126 -135
- package/dist/content/track-render-context.d.ts +17 -0
- package/dist/content/track-render-context.js +44 -0
- package/dist/content/tree-command.d.ts +1 -2
- package/dist/content/tree-command.js +4 -87
- package/dist/content/utility-skills.d.ts +2 -29
- package/dist/content/utility-skills.js +2 -1533
- package/dist/content/view-command.js +29 -11
- package/dist/delegation.d.ts +1 -1
- package/dist/delegation.js +5 -15
- package/dist/doctor-registry.js +20 -21
- package/dist/doctor.js +88 -408
- package/dist/flow-state.d.ts +3 -0
- package/dist/flow-state.js +2 -0
- package/dist/harness-adapters.d.ts +1 -1
- package/dist/harness-adapters.js +48 -57
- package/dist/install.js +128 -520
- package/dist/internal/advance-stage.js +3 -9
- package/dist/internal/compound-readiness.d.ts +1 -1
- package/dist/internal/compound-readiness.js +1 -1
- package/dist/internal/tdd-loop-status.d.ts +1 -1
- package/dist/internal/tdd-loop-status.js +1 -1
- package/dist/knowledge-store.d.ts +16 -10
- package/dist/knowledge-store.js +51 -15
- package/dist/policy.js +16 -109
- package/dist/run-archive.d.ts +4 -6
- package/dist/run-archive.js +15 -20
- package/dist/run-persistence.d.ts +2 -2
- package/dist/run-persistence.js +3 -9
- package/package.json +1 -2
- package/dist/content/archive-command.d.ts +0 -2
- package/dist/content/archive-command.js +0 -124
- package/dist/content/compound-command.d.ts +0 -5
- package/dist/content/compound-command.js +0 -193
- package/dist/content/contexts.d.ts +0 -9
- package/dist/content/contexts.js +0 -65
- package/dist/content/contracts.d.ts +0 -2
- package/dist/content/contracts.js +0 -51
- package/dist/content/doctor-references.d.ts +0 -2
- package/dist/content/doctor-references.js +0 -150
- package/dist/content/eval-scaffold.d.ts +0 -15
- package/dist/content/eval-scaffold.js +0 -370
- package/dist/content/feature-command.d.ts +0 -2
- package/dist/content/feature-command.js +0 -123
- package/dist/content/flow-map.d.ts +0 -23
- package/dist/content/flow-map.js +0 -134
- package/dist/content/harness-doc.d.ts +0 -2
- package/dist/content/harness-doc.js +0 -202
- package/dist/content/harness-playbooks.d.ts +0 -24
- package/dist/content/harness-playbooks.js +0 -393
- package/dist/content/harness-tool-refs.d.ts +0 -20
- package/dist/content/harness-tool-refs.js +0 -268
- package/dist/content/ops-command.d.ts +0 -2
- package/dist/content/ops-command.js +0 -71
- package/dist/content/protocols.d.ts +0 -7
- package/dist/content/protocols.js +0 -215
- package/dist/content/retro-command.d.ts +0 -2
- package/dist/content/retro-command.js +0 -165
- package/dist/content/rewind-command.d.ts +0 -2
- package/dist/content/rewind-command.js +0 -106
- package/dist/content/tdd-log-command.d.ts +0 -2
- package/dist/content/tdd-log-command.js +0 -85
- package/dist/eval/agents/single-shot.d.ts +0 -27
- package/dist/eval/agents/single-shot.js +0 -79
- package/dist/eval/agents/with-tools.d.ts +0 -44
- package/dist/eval/agents/with-tools.js +0 -261
- package/dist/eval/agents/workflow.d.ts +0 -31
- package/dist/eval/agents/workflow.js +0 -155
- package/dist/eval/baseline.d.ts +0 -38
- package/dist/eval/baseline.js +0 -282
- package/dist/eval/config-loader.d.ts +0 -14
- package/dist/eval/config-loader.js +0 -395
- package/dist/eval/corpus.d.ts +0 -30
- package/dist/eval/corpus.js +0 -330
- package/dist/eval/cost-guard.d.ts +0 -102
- package/dist/eval/cost-guard.js +0 -190
- package/dist/eval/diff.d.ts +0 -64
- package/dist/eval/diff.js +0 -323
- package/dist/eval/llm-client.d.ts +0 -176
- package/dist/eval/llm-client.js +0 -267
- package/dist/eval/mode.d.ts +0 -28
- package/dist/eval/mode.js +0 -61
- package/dist/eval/progress.d.ts +0 -83
- package/dist/eval/progress.js +0 -59
- package/dist/eval/report.d.ts +0 -11
- package/dist/eval/report.js +0 -181
- package/dist/eval/rubric-loader.d.ts +0 -20
- package/dist/eval/rubric-loader.js +0 -143
- package/dist/eval/runner.d.ts +0 -81
- package/dist/eval/runner.js +0 -746
- package/dist/eval/runs.d.ts +0 -41
- package/dist/eval/runs.js +0 -114
- package/dist/eval/sandbox.d.ts +0 -38
- package/dist/eval/sandbox.js +0 -137
- package/dist/eval/tools/glob.d.ts +0 -2
- package/dist/eval/tools/glob.js +0 -163
- package/dist/eval/tools/grep.d.ts +0 -2
- package/dist/eval/tools/grep.js +0 -152
- package/dist/eval/tools/index.d.ts +0 -7
- package/dist/eval/tools/index.js +0 -35
- package/dist/eval/tools/read.d.ts +0 -2
- package/dist/eval/tools/read.js +0 -122
- package/dist/eval/tools/types.d.ts +0 -49
- package/dist/eval/tools/types.js +0 -41
- package/dist/eval/tools/write.d.ts +0 -2
- package/dist/eval/tools/write.js +0 -92
- package/dist/eval/types.d.ts +0 -561
- package/dist/eval/types.js +0 -47
- package/dist/eval/verifiers/judge.d.ts +0 -40
- package/dist/eval/verifiers/judge.js +0 -256
- package/dist/eval/verifiers/rules.d.ts +0 -24
- package/dist/eval/verifiers/rules.js +0 -218
- package/dist/eval/verifiers/structural.d.ts +0 -14
- package/dist/eval/verifiers/structural.js +0 -171
- package/dist/eval/verifiers/traceability.d.ts +0 -23
- package/dist/eval/verifiers/traceability.js +0 -84
- package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
- package/dist/eval/verifiers/workflow-consistency.js +0 -225
- package/dist/eval/workflow-corpus.d.ts +0 -7
- package/dist/eval/workflow-corpus.js +0 -207
- package/dist/feature-system.d.ts +0 -42
- package/dist/feature-system.js +0 -432
- package/dist/internal/knowledge-digest.d.ts +0 -7
- package/dist/internal/knowledge-digest.js +0 -93
package/dist/eval/runs.d.ts
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
export declare const RUNS_DIR = "runs";
|
|
2
|
-
export interface EvalRunStatus {
|
|
3
|
-
id: string;
|
|
4
|
-
startedAt: string;
|
|
5
|
-
endedAt?: string;
|
|
6
|
-
pid: number;
|
|
7
|
-
argv: string[];
|
|
8
|
-
cwd: string;
|
|
9
|
-
exitCode?: number;
|
|
10
|
-
state: "running" | "succeeded" | "failed";
|
|
11
|
-
}
|
|
12
|
-
export declare function runsRoot(projectRoot: string): string;
|
|
13
|
-
export declare function runDir(projectRoot: string, id: string): string;
|
|
14
|
-
export declare function runLogPath(projectRoot: string, id: string): string;
|
|
15
|
-
export declare function runStatusPath(projectRoot: string, id: string): string;
|
|
16
|
-
/**
|
|
17
|
-
* Generate a short, lexicographically-sortable run id. The timestamp
|
|
18
|
-
* prefix means `ls -1` already returns the runs in chronological order
|
|
19
|
-
* which keeps the `runs list` subcommand trivial.
|
|
20
|
-
*/
|
|
21
|
-
export declare function generateRunId(now?: Date): string;
|
|
22
|
-
export declare function ensureRunDir(projectRoot: string, id: string): Promise<string>;
|
|
23
|
-
export declare function writeRunStatus(projectRoot: string, status: EvalRunStatus): Promise<void>;
|
|
24
|
-
export declare function readRunStatus(projectRoot: string, id: string): Promise<EvalRunStatus | null>;
|
|
25
|
-
/**
|
|
26
|
-
* List run ids under `.cclaw/evals/runs/`, most recent first. Directory
|
|
27
|
-
* entries that don't contain a `run.json` are skipped (half-initialized
|
|
28
|
-
* or manually mkdir'd folders).
|
|
29
|
-
*/
|
|
30
|
-
export declare function listRuns(projectRoot: string): Promise<EvalRunStatus[]>;
|
|
31
|
-
/**
|
|
32
|
-
* Resolve `"latest"` (or undefined) to the most recent run id.
|
|
33
|
-
* Returns `null` when there are no runs.
|
|
34
|
-
*/
|
|
35
|
-
export declare function resolveRunId(projectRoot: string, hint: string | undefined): Promise<string | null>;
|
|
36
|
-
/**
|
|
37
|
-
* Cheap liveness probe for an EvalRunStatus. A `run.json` can be stale
|
|
38
|
-
* (process crashed mid-commit), so we double-check with `kill(pid, 0)`
|
|
39
|
-
* before trusting the `state: "running"` field.
|
|
40
|
-
*/
|
|
41
|
-
export declare function isRunAlive(status: EvalRunStatus): boolean;
|
package/dist/eval/runs.js
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Run bookkeeping for backgrounded `cclaw eval` invocations.
|
|
3
|
-
*
|
|
4
|
-
* A backgrounded run writes three artifacts under `.cclaw/evals/runs/<id>/`:
|
|
5
|
-
*
|
|
6
|
-
* - `run.json` — status metadata (pid, started/ended ISO timestamps,
|
|
7
|
-
* exit code, argv, cwd). Updated at start and at exit.
|
|
8
|
-
* - `run.log` — combined stdout+stderr of the child process. This is
|
|
9
|
-
* what `cclaw eval runs tail` streams.
|
|
10
|
-
* - `run.pid` — just the pid, written atomically so `runs status`
|
|
11
|
-
* can probe liveness without parsing JSON.
|
|
12
|
-
*
|
|
13
|
-
* The `id` is a short alphanumeric string (8 chars + ISO timestamp prefix)
|
|
14
|
-
* chosen so sorting directory entries by name produces a chronological
|
|
15
|
-
* listing without any extra work.
|
|
16
|
-
*/
|
|
17
|
-
import { randomBytes } from "node:crypto";
|
|
18
|
-
import fs from "node:fs/promises";
|
|
19
|
-
import path from "node:path";
|
|
20
|
-
import { EVALS_ROOT } from "../constants.js";
|
|
21
|
-
import { exists } from "../fs-utils.js";
|
|
22
|
-
export const RUNS_DIR = "runs";
|
|
23
|
-
export function runsRoot(projectRoot) {
|
|
24
|
-
return path.join(projectRoot, EVALS_ROOT, RUNS_DIR);
|
|
25
|
-
}
|
|
26
|
-
export function runDir(projectRoot, id) {
|
|
27
|
-
return path.join(runsRoot(projectRoot), id);
|
|
28
|
-
}
|
|
29
|
-
export function runLogPath(projectRoot, id) {
|
|
30
|
-
return path.join(runDir(projectRoot, id), "run.log");
|
|
31
|
-
}
|
|
32
|
-
export function runStatusPath(projectRoot, id) {
|
|
33
|
-
return path.join(runDir(projectRoot, id), "run.json");
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Generate a short, lexicographically-sortable run id. The timestamp
|
|
37
|
-
* prefix means `ls -1` already returns the runs in chronological order
|
|
38
|
-
* which keeps the `runs list` subcommand trivial.
|
|
39
|
-
*/
|
|
40
|
-
export function generateRunId(now = new Date()) {
|
|
41
|
-
const ts = now.toISOString().replace(/[-:]/g, "").replace(/\.\d+Z$/, "Z");
|
|
42
|
-
const suffix = randomBytes(3).toString("hex");
|
|
43
|
-
return `${ts}-${suffix}`;
|
|
44
|
-
}
|
|
45
|
-
export async function ensureRunDir(projectRoot, id) {
|
|
46
|
-
const dir = runDir(projectRoot, id);
|
|
47
|
-
await fs.mkdir(dir, { recursive: true });
|
|
48
|
-
return dir;
|
|
49
|
-
}
|
|
50
|
-
export async function writeRunStatus(projectRoot, status) {
|
|
51
|
-
await ensureRunDir(projectRoot, status.id);
|
|
52
|
-
await fs.writeFile(runStatusPath(projectRoot, status.id), `${JSON.stringify(status, null, 2)}\n`, "utf8");
|
|
53
|
-
}
|
|
54
|
-
export async function readRunStatus(projectRoot, id) {
|
|
55
|
-
const file = runStatusPath(projectRoot, id);
|
|
56
|
-
if (!(await exists(file)))
|
|
57
|
-
return null;
|
|
58
|
-
try {
|
|
59
|
-
const raw = await fs.readFile(file, "utf8");
|
|
60
|
-
return JSON.parse(raw);
|
|
61
|
-
}
|
|
62
|
-
catch {
|
|
63
|
-
return null;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
/**
|
|
67
|
-
* List run ids under `.cclaw/evals/runs/`, most recent first. Directory
|
|
68
|
-
* entries that don't contain a `run.json` are skipped (half-initialized
|
|
69
|
-
* or manually mkdir'd folders).
|
|
70
|
-
*/
|
|
71
|
-
export async function listRuns(projectRoot) {
|
|
72
|
-
const root = runsRoot(projectRoot);
|
|
73
|
-
if (!(await exists(root)))
|
|
74
|
-
return [];
|
|
75
|
-
const entries = await fs.readdir(root, { withFileTypes: true });
|
|
76
|
-
const out = [];
|
|
77
|
-
for (const entry of entries) {
|
|
78
|
-
if (!entry.isDirectory())
|
|
79
|
-
continue;
|
|
80
|
-
const status = await readRunStatus(projectRoot, entry.name);
|
|
81
|
-
if (status)
|
|
82
|
-
out.push(status);
|
|
83
|
-
}
|
|
84
|
-
out.sort((a, b) => (a.startedAt < b.startedAt ? 1 : -1));
|
|
85
|
-
return out;
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Resolve `"latest"` (or undefined) to the most recent run id.
|
|
89
|
-
* Returns `null` when there are no runs.
|
|
90
|
-
*/
|
|
91
|
-
export async function resolveRunId(projectRoot, hint) {
|
|
92
|
-
if (hint && hint !== "latest") {
|
|
93
|
-
const status = await readRunStatus(projectRoot, hint);
|
|
94
|
-
return status ? hint : null;
|
|
95
|
-
}
|
|
96
|
-
const runs = await listRuns(projectRoot);
|
|
97
|
-
return runs[0]?.id ?? null;
|
|
98
|
-
}
|
|
99
|
-
/**
|
|
100
|
-
* Cheap liveness probe for an EvalRunStatus. A `run.json` can be stale
|
|
101
|
-
* (process crashed mid-commit), so we double-check with `kill(pid, 0)`
|
|
102
|
-
* before trusting the `state: "running"` field.
|
|
103
|
-
*/
|
|
104
|
-
export function isRunAlive(status) {
|
|
105
|
-
if (status.state !== "running")
|
|
106
|
-
return false;
|
|
107
|
-
try {
|
|
108
|
-
process.kill(status.pid, 0);
|
|
109
|
-
return true;
|
|
110
|
-
}
|
|
111
|
-
catch {
|
|
112
|
-
return false;
|
|
113
|
-
}
|
|
114
|
-
}
|
package/dist/eval/sandbox.d.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
export declare class SandboxEscapeError extends Error {
|
|
2
|
-
readonly requestedPath: string;
|
|
3
|
-
constructor(requestedPath: string, reason: string);
|
|
4
|
-
}
|
|
5
|
-
export interface SandboxOptions {
|
|
6
|
-
/** Project root that `contextFiles` are resolved against. */
|
|
7
|
-
projectRoot: string;
|
|
8
|
-
/** Case-relative paths to copy into the sandbox before the agent starts. */
|
|
9
|
-
contextFiles?: string[];
|
|
10
|
-
/**
|
|
11
|
-
* Base directory that will host the per-case tmpdir. Defaults to
|
|
12
|
-
* `os.tmpdir()`. Tests inject a repo-local path so CI leaves no
|
|
13
|
-
* traces in `/tmp` when assertions fail.
|
|
14
|
-
*/
|
|
15
|
-
baseDir?: string;
|
|
16
|
-
/** Override the per-case suffix. Primarily for deterministic tests. */
|
|
17
|
-
idOverride?: string;
|
|
18
|
-
}
|
|
19
|
-
export interface Sandbox {
|
|
20
|
-
/** Absolute path to the sandbox root directory. */
|
|
21
|
-
root: string;
|
|
22
|
-
/**
|
|
23
|
-
* Resolve `requested` relative to the sandbox root and return the
|
|
24
|
-
* absolute, realpath'd filesystem path. Throws
|
|
25
|
-
* `SandboxEscapeError` when the resolution crosses the boundary.
|
|
26
|
-
*
|
|
27
|
-
* `allowMissing: true` lets callers pre-resolve a destination for a
|
|
28
|
-
* write where the final component doesn't exist yet — the parent
|
|
29
|
-
* directory is realpath'd to still catch symlink escapes.
|
|
30
|
-
*/
|
|
31
|
-
resolve(requested: string, options?: {
|
|
32
|
-
allowMissing?: boolean;
|
|
33
|
-
}): Promise<string>;
|
|
34
|
-
/** Remove the sandbox directory. Idempotent. */
|
|
35
|
-
dispose(): Promise<void>;
|
|
36
|
-
}
|
|
37
|
-
/** Create and prep a fresh sandbox. Callers own cleanup via `dispose()`. */
|
|
38
|
-
export declare function createSandbox(options: SandboxOptions): Promise<Sandbox>;
|
package/dist/eval/sandbox.js
DELETED
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-case sandbox for the with-tools agent (agent/workflow mode).
|
|
3
|
-
*
|
|
4
|
-
* Every case gets its own `os.tmpdir()/cclaw-eval-<uuid>/` directory. Any
|
|
5
|
-
* `contextFiles` the case declares are copied in relative to the project
|
|
6
|
-
* root, and every tool invocation resolves paths against the sandbox
|
|
7
|
-
* root with a defensive check that refuses symlinks and `..` escapes.
|
|
8
|
-
*
|
|
9
|
-
* Design notes:
|
|
10
|
-
*
|
|
11
|
-
* - The sandbox is intentionally tiny (one directory, no symlink
|
|
12
|
-
* creation, no executable bits). We rely on `fs.realpath` on every
|
|
13
|
-
* resolved path so hostile tool output that creates a symlink to
|
|
14
|
-
* `/etc/passwd` and then tries to read it still trips the boundary
|
|
15
|
-
* check.
|
|
16
|
-
* - Cleanup is handled by `dispose()`; callers (runner, tests) must
|
|
17
|
-
* invoke it in a `try/finally` so leftover temp directories never
|
|
18
|
-
* accumulate.
|
|
19
|
-
* - The sandbox does not preserve the project's directory structure
|
|
20
|
-
* verbatim. Each entry in `contextFiles` is copied flat into
|
|
21
|
-
* `sandboxRoot/<basename>` unless it contains path separators, in
|
|
22
|
-
* which case the full relative layout is recreated. That keeps demo
|
|
23
|
-
* cases portable while still letting richer cases place files under
|
|
24
|
-
* subdirectories (e.g. `.cclaw/skills/brainstorming/SKILL.md`).
|
|
25
|
-
*/
|
|
26
|
-
import { randomUUID } from "node:crypto";
|
|
27
|
-
import fs from "node:fs/promises";
|
|
28
|
-
import os from "node:os";
|
|
29
|
-
import path from "node:path";
|
|
30
|
-
export class SandboxEscapeError extends Error {
|
|
31
|
-
requestedPath;
|
|
32
|
-
constructor(requestedPath, reason) {
|
|
33
|
-
super(`Sandbox refused path "${requestedPath}": ${reason}.`);
|
|
34
|
-
this.name = "SandboxEscapeError";
|
|
35
|
-
this.requestedPath = requestedPath;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
/** Create and prep a fresh sandbox. Callers own cleanup via `dispose()`. */
|
|
39
|
-
export async function createSandbox(options) {
|
|
40
|
-
const baseDir = options.baseDir ?? os.tmpdir();
|
|
41
|
-
const id = options.idOverride ?? randomUUID();
|
|
42
|
-
const root = path.join(baseDir, `cclaw-eval-${id}`);
|
|
43
|
-
await fs.mkdir(root, { recursive: true });
|
|
44
|
-
const realRoot = await fs.realpath(root);
|
|
45
|
-
if (options.contextFiles && options.contextFiles.length > 0) {
|
|
46
|
-
for (const rel of options.contextFiles) {
|
|
47
|
-
await copyContextFile(options.projectRoot, realRoot, rel);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
async function resolveInside(requested, opts = {}) {
|
|
51
|
-
if (typeof requested !== "string" || requested.length === 0) {
|
|
52
|
-
throw new SandboxEscapeError(String(requested), "path must be a non-empty string");
|
|
53
|
-
}
|
|
54
|
-
if (path.isAbsolute(requested)) {
|
|
55
|
-
throw new SandboxEscapeError(requested, "absolute paths are not allowed");
|
|
56
|
-
}
|
|
57
|
-
if (requested.includes("\0")) {
|
|
58
|
-
throw new SandboxEscapeError(requested, "NUL byte in path");
|
|
59
|
-
}
|
|
60
|
-
const joined = path.resolve(realRoot, requested);
|
|
61
|
-
const relative = path.relative(realRoot, joined);
|
|
62
|
-
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
|
63
|
-
throw new SandboxEscapeError(requested, "resolves outside the sandbox");
|
|
64
|
-
}
|
|
65
|
-
let finalPath;
|
|
66
|
-
try {
|
|
67
|
-
finalPath = await fs.realpath(joined);
|
|
68
|
-
}
|
|
69
|
-
catch (err) {
|
|
70
|
-
if (!opts.allowMissing) {
|
|
71
|
-
throw new SandboxEscapeError(requested, `realpath failed: ${err.message}`);
|
|
72
|
-
}
|
|
73
|
-
const existingAncestor = await findExistingAncestor(joined, realRoot);
|
|
74
|
-
if (!existingAncestor) {
|
|
75
|
-
throw new SandboxEscapeError(requested, "no existing ancestor inside the sandbox");
|
|
76
|
-
}
|
|
77
|
-
const ancestorRel = path.relative(realRoot, existingAncestor.real);
|
|
78
|
-
if (ancestorRel.startsWith("..") || path.isAbsolute(ancestorRel)) {
|
|
79
|
-
throw new SandboxEscapeError(requested, "parent resolves outside the sandbox");
|
|
80
|
-
}
|
|
81
|
-
finalPath = path.join(existingAncestor.real, existingAncestor.trailing);
|
|
82
|
-
}
|
|
83
|
-
const finalRel = path.relative(realRoot, finalPath);
|
|
84
|
-
if (finalRel.startsWith("..") || path.isAbsolute(finalRel)) {
|
|
85
|
-
throw new SandboxEscapeError(requested, "realpath escapes the sandbox");
|
|
86
|
-
}
|
|
87
|
-
return finalPath;
|
|
88
|
-
}
|
|
89
|
-
return {
|
|
90
|
-
root: realRoot,
|
|
91
|
-
resolve: resolveInside,
|
|
92
|
-
async dispose() {
|
|
93
|
-
await fs.rm(realRoot, { recursive: true, force: true });
|
|
94
|
-
}
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
async function findExistingAncestor(target, stopAt) {
|
|
98
|
-
const segments = [];
|
|
99
|
-
let current = target;
|
|
100
|
-
while (true) {
|
|
101
|
-
try {
|
|
102
|
-
const real = await fs.realpath(current);
|
|
103
|
-
return { real, trailing: path.join(...segments.reverse()) };
|
|
104
|
-
}
|
|
105
|
-
catch {
|
|
106
|
-
const parent = path.dirname(current);
|
|
107
|
-
if (parent === current)
|
|
108
|
-
return undefined;
|
|
109
|
-
segments.push(path.basename(current));
|
|
110
|
-
if (path.relative(stopAt, parent).startsWith(".."))
|
|
111
|
-
return undefined;
|
|
112
|
-
current = parent;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
async function copyContextFile(projectRoot, sandboxRoot, relPath) {
|
|
117
|
-
if (path.isAbsolute(relPath)) {
|
|
118
|
-
throw new Error(`context_files must be project-relative: ${relPath}`);
|
|
119
|
-
}
|
|
120
|
-
const src = path.resolve(projectRoot, relPath);
|
|
121
|
-
const srcReal = await fs.realpath(src);
|
|
122
|
-
const projectReal = await fs.realpath(projectRoot);
|
|
123
|
-
const inside = path.relative(projectReal, srcReal);
|
|
124
|
-
if (inside.startsWith("..") || path.isAbsolute(inside)) {
|
|
125
|
-
throw new Error(`context_files entry resolves outside the project: ${relPath}`);
|
|
126
|
-
}
|
|
127
|
-
const stat = await fs.stat(srcReal);
|
|
128
|
-
if (stat.isDirectory()) {
|
|
129
|
-
const dest = path.join(sandboxRoot, relPath);
|
|
130
|
-
await fs.mkdir(dest, { recursive: true });
|
|
131
|
-
await fs.cp(srcReal, dest, { recursive: true });
|
|
132
|
-
return;
|
|
133
|
-
}
|
|
134
|
-
const dest = path.join(sandboxRoot, relPath);
|
|
135
|
-
await fs.mkdir(path.dirname(dest), { recursive: true });
|
|
136
|
-
await fs.copyFile(srcReal, dest);
|
|
137
|
-
}
|
package/dist/eval/tools/glob.js
DELETED
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs/promises";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { SandboxEscapeError } from "../sandbox.js";
|
|
4
|
-
import { parseArgs, requireString, truncatePayload } from "./types.js";
|
|
5
|
-
const DESCRIPTION = "List files inside the sandbox whose relative path matches a glob-style " +
|
|
6
|
-
"pattern. Supports `*` (any chars within a path segment) and `**` " +
|
|
7
|
-
"(any number of path segments). Returns matching paths, one per line.";
|
|
8
|
-
const MAX_MATCHES = 500;
|
|
9
|
-
export const globTool = {
|
|
10
|
-
descriptor: {
|
|
11
|
-
name: "glob",
|
|
12
|
-
description: DESCRIPTION,
|
|
13
|
-
parameters: {
|
|
14
|
-
type: "object",
|
|
15
|
-
additionalProperties: false,
|
|
16
|
-
required: ["pattern"],
|
|
17
|
-
properties: {
|
|
18
|
-
pattern: {
|
|
19
|
-
type: "string",
|
|
20
|
-
description: "Glob pattern, relative to the sandbox root."
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
},
|
|
25
|
-
async invoke(rawArgs, ctx) {
|
|
26
|
-
let args;
|
|
27
|
-
try {
|
|
28
|
-
args = parseArgs(rawArgs);
|
|
29
|
-
}
|
|
30
|
-
catch (err) {
|
|
31
|
-
return { ok: false, name: this.descriptor.name, error: err.message };
|
|
32
|
-
}
|
|
33
|
-
let pattern;
|
|
34
|
-
try {
|
|
35
|
-
pattern = requireString(args, "pattern");
|
|
36
|
-
}
|
|
37
|
-
catch (err) {
|
|
38
|
-
return { ok: false, name: this.descriptor.name, error: err.message };
|
|
39
|
-
}
|
|
40
|
-
if (pattern.includes("\0")) {
|
|
41
|
-
return {
|
|
42
|
-
ok: false,
|
|
43
|
-
name: this.descriptor.name,
|
|
44
|
-
error: '"pattern" must not contain NUL bytes'
|
|
45
|
-
};
|
|
46
|
-
}
|
|
47
|
-
let regex;
|
|
48
|
-
try {
|
|
49
|
-
regex = globToRegExp(pattern);
|
|
50
|
-
}
|
|
51
|
-
catch (err) {
|
|
52
|
-
return {
|
|
53
|
-
ok: false,
|
|
54
|
-
name: this.descriptor.name,
|
|
55
|
-
error: err.message
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
const matches = [];
|
|
59
|
-
try {
|
|
60
|
-
await walk(ctx.sandbox.root, "", matches, regex);
|
|
61
|
-
}
|
|
62
|
-
catch (err) {
|
|
63
|
-
if (err instanceof SandboxEscapeError) {
|
|
64
|
-
return {
|
|
65
|
-
ok: false,
|
|
66
|
-
name: this.descriptor.name,
|
|
67
|
-
error: err.message,
|
|
68
|
-
details: { deniedPath: pattern }
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
return {
|
|
72
|
-
ok: false,
|
|
73
|
-
name: this.descriptor.name,
|
|
74
|
-
error: `walk failed: ${err.message}`
|
|
75
|
-
};
|
|
76
|
-
}
|
|
77
|
-
matches.sort();
|
|
78
|
-
const capped = matches.slice(0, MAX_MATCHES);
|
|
79
|
-
const body = capped.length > 0
|
|
80
|
-
? capped.join("\n") +
|
|
81
|
-
(matches.length > capped.length
|
|
82
|
-
? `\n…[truncated at ${MAX_MATCHES} matches]`
|
|
83
|
-
: "")
|
|
84
|
-
: "(no matches)";
|
|
85
|
-
return {
|
|
86
|
-
ok: true,
|
|
87
|
-
name: this.descriptor.name,
|
|
88
|
-
content: truncatePayload(body, ctx.maxResultBytes),
|
|
89
|
-
details: {
|
|
90
|
-
pattern,
|
|
91
|
-
matches: capped.length,
|
|
92
|
-
totalMatches: matches.length,
|
|
93
|
-
truncated: matches.length > capped.length
|
|
94
|
-
}
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
};
|
|
98
|
-
async function walk(root, rel, acc, regex) {
|
|
99
|
-
const dir = path.join(root, rel);
|
|
100
|
-
let entries;
|
|
101
|
-
try {
|
|
102
|
-
entries = (await fs.readdir(dir, { withFileTypes: true }));
|
|
103
|
-
}
|
|
104
|
-
catch {
|
|
105
|
-
return;
|
|
106
|
-
}
|
|
107
|
-
for (const entry of entries) {
|
|
108
|
-
const childRel = rel ? path.join(rel, entry.name) : entry.name;
|
|
109
|
-
if (entry.isSymbolicLink())
|
|
110
|
-
continue;
|
|
111
|
-
if (entry.isDirectory()) {
|
|
112
|
-
await walk(root, childRel, acc, regex);
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
if (entry.isFile() && regex.test(childRel.replace(/\\/g, "/"))) {
|
|
116
|
-
acc.push(childRel);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
/**
|
|
121
|
-
* Minimal glob → regex: `**` matches zero or more path segments, `*`
|
|
122
|
-
* matches anything except `/`, `?` matches a single non-slash char.
|
|
123
|
-
* Everything else is escaped. Intentionally narrower than full
|
|
124
|
-
* bash-style expansion so behavior is easy to reason about.
|
|
125
|
-
*/
|
|
126
|
-
function globToRegExp(pattern) {
|
|
127
|
-
const normalized = pattern.replace(/\\/g, "/");
|
|
128
|
-
let src = "^";
|
|
129
|
-
let i = 0;
|
|
130
|
-
while (i < normalized.length) {
|
|
131
|
-
const c = normalized[i];
|
|
132
|
-
if (c === "*") {
|
|
133
|
-
if (normalized[i + 1] === "*") {
|
|
134
|
-
if (normalized[i + 2] === "/") {
|
|
135
|
-
src += "(?:.*/)?";
|
|
136
|
-
i += 3;
|
|
137
|
-
}
|
|
138
|
-
else {
|
|
139
|
-
src += ".*";
|
|
140
|
-
i += 2;
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
else {
|
|
144
|
-
src += "[^/]*";
|
|
145
|
-
i += 1;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
else if (c === "?") {
|
|
149
|
-
src += "[^/]";
|
|
150
|
-
i += 1;
|
|
151
|
-
}
|
|
152
|
-
else if ("+()|^$.{}[]\\".includes(c)) {
|
|
153
|
-
src += `\\${c}`;
|
|
154
|
-
i += 1;
|
|
155
|
-
}
|
|
156
|
-
else {
|
|
157
|
-
src += c;
|
|
158
|
-
i += 1;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
src += "$";
|
|
162
|
-
return new RegExp(src);
|
|
163
|
-
}
|
package/dist/eval/tools/grep.js
DELETED
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs/promises";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { SandboxEscapeError } from "../sandbox.js";
|
|
4
|
-
import { parseArgs, requireString, optionalNumber, truncatePayload } from "./types.js";
|
|
5
|
-
const DESCRIPTION = "Search the sandbox for a regular expression. Returns matching lines in " +
|
|
6
|
-
"`path:line:text` form. Accepts optional `caseInsensitive` and a per-call " +
|
|
7
|
-
"`maxMatches` cap (default 100, hard max 500).";
|
|
8
|
-
const HARD_MAX = 500;
|
|
9
|
-
export const grepTool = {
|
|
10
|
-
descriptor: {
|
|
11
|
-
name: "grep",
|
|
12
|
-
description: DESCRIPTION,
|
|
13
|
-
parameters: {
|
|
14
|
-
type: "object",
|
|
15
|
-
additionalProperties: false,
|
|
16
|
-
required: ["pattern"],
|
|
17
|
-
properties: {
|
|
18
|
-
pattern: {
|
|
19
|
-
type: "string",
|
|
20
|
-
description: "Regular expression compiled with JavaScript semantics."
|
|
21
|
-
},
|
|
22
|
-
caseInsensitive: {
|
|
23
|
-
type: "boolean",
|
|
24
|
-
description: "Match case-insensitively (default false)."
|
|
25
|
-
},
|
|
26
|
-
maxMatches: {
|
|
27
|
-
type: "integer",
|
|
28
|
-
minimum: 1,
|
|
29
|
-
description: "Stop after N matches (default 100, hard max 500)."
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
},
|
|
34
|
-
async invoke(rawArgs, ctx) {
|
|
35
|
-
let args;
|
|
36
|
-
try {
|
|
37
|
-
args = parseArgs(rawArgs);
|
|
38
|
-
}
|
|
39
|
-
catch (err) {
|
|
40
|
-
return { ok: false, name: this.descriptor.name, error: err.message };
|
|
41
|
-
}
|
|
42
|
-
let pattern;
|
|
43
|
-
try {
|
|
44
|
-
pattern = requireString(args, "pattern");
|
|
45
|
-
}
|
|
46
|
-
catch (err) {
|
|
47
|
-
return { ok: false, name: this.descriptor.name, error: err.message };
|
|
48
|
-
}
|
|
49
|
-
const caseInsensitive = args.caseInsensitive === true;
|
|
50
|
-
let maxMatches;
|
|
51
|
-
try {
|
|
52
|
-
const raw = optionalNumber(args, "maxMatches");
|
|
53
|
-
maxMatches = raw === undefined ? 100 : Math.min(HARD_MAX, Math.max(1, Math.floor(raw)));
|
|
54
|
-
}
|
|
55
|
-
catch (err) {
|
|
56
|
-
return {
|
|
57
|
-
ok: false,
|
|
58
|
-
name: this.descriptor.name,
|
|
59
|
-
error: err.message
|
|
60
|
-
};
|
|
61
|
-
}
|
|
62
|
-
let regex;
|
|
63
|
-
try {
|
|
64
|
-
regex = new RegExp(pattern, caseInsensitive ? "i" : "");
|
|
65
|
-
}
|
|
66
|
-
catch (err) {
|
|
67
|
-
return {
|
|
68
|
-
ok: false,
|
|
69
|
-
name: this.descriptor.name,
|
|
70
|
-
error: `invalid regex: ${err.message}`
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
let filesScanned = 0;
|
|
74
|
-
const hits = [];
|
|
75
|
-
try {
|
|
76
|
-
await walk(ctx.sandbox.root, "", async (relPath, abs) => {
|
|
77
|
-
if (hits.length >= maxMatches)
|
|
78
|
-
return false;
|
|
79
|
-
let content;
|
|
80
|
-
try {
|
|
81
|
-
content = await fs.readFile(abs, "utf8");
|
|
82
|
-
}
|
|
83
|
-
catch {
|
|
84
|
-
return true;
|
|
85
|
-
}
|
|
86
|
-
filesScanned += 1;
|
|
87
|
-
const lines = content.split(/\r?\n/);
|
|
88
|
-
for (let i = 0; i < lines.length; i += 1) {
|
|
89
|
-
const line = lines[i];
|
|
90
|
-
if (regex.test(line)) {
|
|
91
|
-
hits.push(`${relPath}:${i + 1}:${line}`);
|
|
92
|
-
if (hits.length >= maxMatches)
|
|
93
|
-
return false;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
return true;
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
catch (err) {
|
|
100
|
-
if (err instanceof SandboxEscapeError) {
|
|
101
|
-
return {
|
|
102
|
-
ok: false,
|
|
103
|
-
name: this.descriptor.name,
|
|
104
|
-
error: err.message,
|
|
105
|
-
details: { deniedPath: pattern }
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
return {
|
|
109
|
-
ok: false,
|
|
110
|
-
name: this.descriptor.name,
|
|
111
|
-
error: `walk failed: ${err.message}`
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
const body = hits.length > 0 ? hits.join("\n") : "(no matches)";
|
|
115
|
-
return {
|
|
116
|
-
ok: true,
|
|
117
|
-
name: this.descriptor.name,
|
|
118
|
-
content: truncatePayload(body, ctx.maxResultBytes),
|
|
119
|
-
details: {
|
|
120
|
-
pattern,
|
|
121
|
-
caseInsensitive,
|
|
122
|
-
matches: hits.length,
|
|
123
|
-
filesScanned,
|
|
124
|
-
truncated: hits.length >= maxMatches
|
|
125
|
-
}
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
};
|
|
129
|
-
async function walk(root, rel, visit) {
|
|
130
|
-
const dir = path.join(root, rel);
|
|
131
|
-
let entries;
|
|
132
|
-
try {
|
|
133
|
-
entries = (await fs.readdir(dir, { withFileTypes: true }));
|
|
134
|
-
}
|
|
135
|
-
catch {
|
|
136
|
-
return;
|
|
137
|
-
}
|
|
138
|
-
for (const entry of entries) {
|
|
139
|
-
const childRel = rel ? path.join(rel, entry.name) : entry.name;
|
|
140
|
-
if (entry.isSymbolicLink())
|
|
141
|
-
continue;
|
|
142
|
-
if (entry.isDirectory()) {
|
|
143
|
-
await walk(root, childRel, visit);
|
|
144
|
-
continue;
|
|
145
|
-
}
|
|
146
|
-
if (entry.isFile()) {
|
|
147
|
-
const keepGoing = await visit(childRel.replace(/\\/g, "/"), path.join(root, childRel));
|
|
148
|
-
if (keepGoing === false)
|
|
149
|
-
return;
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
}
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
import type { SandboxTool } from "./types.js";
|
|
2
|
-
export { SandboxTool, ToolResult, ToolContext, truncatePayload } from "./types.js";
|
|
3
|
-
export declare const BUILTIN_TOOLS: SandboxTool[];
|
|
4
|
-
/** Build a lookup for the agent loop. */
|
|
5
|
-
export declare function toolsByName(tools?: SandboxTool[]): Map<string, SandboxTool>;
|
|
6
|
-
/** Shape a tool list for OpenAI-style `tools[]` in the chat request. */
|
|
7
|
-
export declare function toolsForRequest(tools?: SandboxTool[]): unknown[];
|