@gonzih/cc-agent 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +15 -0
- package/dist/agent.d.ts.map +1 -1
- package/dist/agent.js +193 -2
- package/dist/agent.js.map +1 -1
- package/dist/docker.d.ts +31 -0
- package/dist/docker.d.ts.map +1 -0
- package/dist/docker.js +165 -0
- package/dist/docker.js.map +1 -0
- package/dist/evaluator.d.ts +14 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +88 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/index.js +147 -11
- package/dist/index.js.map +1 -1
- package/dist/preamble.d.ts +1 -1
- package/dist/preamble.d.ts.map +1 -1
- package/dist/preamble.js +14 -0
- package/dist/preamble.js.map +1 -1
- package/dist/store.d.ts +6 -0
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +4 -0
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +20 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/docker.js
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import { execFile, spawn } from "child_process";
|
|
2
|
+
import { EventEmitter } from "events";
|
|
3
|
+
import { promisify } from "util";
|
|
4
|
+
import { logger } from "./logger.js";
|
|
5
|
+
const execFileAsync = promisify(execFile);
|
|
6
|
+
export async function isDockerAvailable() {
|
|
7
|
+
try {
|
|
8
|
+
await execFileAsync("docker", ["info"], { timeout: 5000 });
|
|
9
|
+
return true;
|
|
10
|
+
}
|
|
11
|
+
catch {
|
|
12
|
+
return false;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
export async function listCcAgentContainers() {
|
|
16
|
+
try {
|
|
17
|
+
const { stdout } = await execFileAsync("docker", [
|
|
18
|
+
"ps",
|
|
19
|
+
"--filter", "name=cc-agent-",
|
|
20
|
+
"--format", "{{.ID}}\t{{.Names}}\t{{.Status}}\t{{.RunningFor}}",
|
|
21
|
+
]);
|
|
22
|
+
return stdout
|
|
23
|
+
.trim()
|
|
24
|
+
.split("\n")
|
|
25
|
+
.filter(Boolean)
|
|
26
|
+
.map((line) => {
|
|
27
|
+
const [id, name, status, uptime] = line.split("\t");
|
|
28
|
+
return { id: id ?? "", name: name ?? "", status: status ?? "", uptime: uptime ?? "" };
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return [];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Run a cc-agent job inside a Docker container.
|
|
37
|
+
*
|
|
38
|
+
* Emits:
|
|
39
|
+
* "text" (line: string) — each line of container output
|
|
40
|
+
* "exit" (code: number) — container exit code
|
|
41
|
+
* "error" (err: unknown) — fatal error before container starts
|
|
42
|
+
*/
|
|
43
|
+
export function runDockerAgent(opts) {
|
|
44
|
+
const emitter = new EventEmitter();
|
|
45
|
+
emitter.pid = undefined;
|
|
46
|
+
emitter.stdin = null;
|
|
47
|
+
let containerStarted = false;
|
|
48
|
+
let killed = false;
|
|
49
|
+
emitter.kill = () => {
|
|
50
|
+
killed = true;
|
|
51
|
+
if (containerStarted) {
|
|
52
|
+
execFile("docker", ["rm", "-f", opts.containerName], () => { });
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
void (async () => {
|
|
56
|
+
try {
|
|
57
|
+
// Build docker env args
|
|
58
|
+
const envArgs = [];
|
|
59
|
+
if (opts.anthropicToken) {
|
|
60
|
+
envArgs.push("-e", `ANTHROPIC_AUTH_TOKEN=${opts.anthropicToken}`);
|
|
61
|
+
envArgs.push("-e", `ANTHROPIC_API_KEY=${opts.anthropicToken}`);
|
|
62
|
+
}
|
|
63
|
+
if (opts.githubToken) {
|
|
64
|
+
envArgs.push("-e", `GITHUB_TOKEN=${opts.githubToken}`);
|
|
65
|
+
envArgs.push("-e", `GH_TOKEN=${opts.githubToken}`);
|
|
66
|
+
}
|
|
67
|
+
if (opts.namespace) {
|
|
68
|
+
envArgs.push("-e", `CC_AGENT_NAMESPACE=${opts.namespace}`);
|
|
69
|
+
}
|
|
70
|
+
envArgs.push("-e", "HOME=/root");
|
|
71
|
+
envArgs.push("-e", "GIT_CONFIG_GLOBAL=/dev/null");
|
|
72
|
+
// Pass task and repo via env to avoid shell quoting issues
|
|
73
|
+
envArgs.push("-e", `CC_DOCKER_TASK=${opts.task}`);
|
|
74
|
+
envArgs.push("-e", `CC_DOCKER_REPO=${opts.repoUrl}`);
|
|
75
|
+
const containerScript = [
|
|
76
|
+
"set -e",
|
|
77
|
+
// Install system deps (node:22 is Debian-based)
|
|
78
|
+
"apt-get update -qq >/dev/null 2>&1 && apt-get install -y -qq git curl >/dev/null 2>&1",
|
|
79
|
+
// Install gh CLI via direct binary download (amd64)
|
|
80
|
+
"GH_VERSION=2.65.0",
|
|
81
|
+
"ARCH=$(dpkg --print-architecture 2>/dev/null || echo amd64)",
|
|
82
|
+
"curl -fsSL \"https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_${ARCH}.tar.gz\" -o /tmp/gh.tar.gz",
|
|
83
|
+
"tar -xzf /tmp/gh.tar.gz -C /tmp",
|
|
84
|
+
"mv /tmp/gh_${GH_VERSION}_linux_${ARCH}/bin/gh /usr/local/bin/",
|
|
85
|
+
// Install claude-code
|
|
86
|
+
"npm install -g @anthropic-ai/claude-code >/dev/null 2>&1",
|
|
87
|
+
// Configure git
|
|
88
|
+
"git config --global user.email 'cc-agent@localhost'",
|
|
89
|
+
"git config --global user.name 'cc-agent'",
|
|
90
|
+
// Configure HTTPS credential helper for GitHub token
|
|
91
|
+
"git config --global credential.helper '!f() { echo username=x-access-token; echo password=$GITHUB_TOKEN; }; f'",
|
|
92
|
+
// Clone repo
|
|
93
|
+
"git clone --depth 1 \"$CC_DOCKER_REPO\" /workspace",
|
|
94
|
+
"cd /workspace",
|
|
95
|
+
// Run Claude (dangerously-skip-permissions needed for non-interactive use)
|
|
96
|
+
"exec claude --dangerously-skip-permissions --print --output-format stream-json -p \"$CC_DOCKER_TASK\"",
|
|
97
|
+
].join(" && ");
|
|
98
|
+
if (killed)
|
|
99
|
+
return;
|
|
100
|
+
// Start container in detached mode
|
|
101
|
+
const { stdout: dockerIdRaw } = await execFileAsync("docker", [
|
|
102
|
+
"run", "-d",
|
|
103
|
+
"--name", opts.containerName,
|
|
104
|
+
...envArgs,
|
|
105
|
+
"node:22",
|
|
106
|
+
"/bin/sh", "-c", containerScript,
|
|
107
|
+
]);
|
|
108
|
+
const dockerId = dockerIdRaw.trim();
|
|
109
|
+
containerStarted = true;
|
|
110
|
+
logger.info("docker:container-started", { name: opts.containerName, id: dockerId });
|
|
111
|
+
if (killed) {
|
|
112
|
+
execFile("docker", ["rm", "-f", opts.containerName], () => { });
|
|
113
|
+
emitter.emit("exit", 1);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
// Stream logs from container
|
|
117
|
+
const logProc = spawn("docker", ["logs", "-f", opts.containerName], {
|
|
118
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
119
|
+
});
|
|
120
|
+
let buf = "";
|
|
121
|
+
const onData = (data) => {
|
|
122
|
+
buf += data.toString();
|
|
123
|
+
const lines = buf.split("\n");
|
|
124
|
+
buf = lines.pop() ?? "";
|
|
125
|
+
for (const line of lines) {
|
|
126
|
+
emitter.emit("text", line);
|
|
127
|
+
}
|
|
128
|
+
};
|
|
129
|
+
logProc.stdout?.on("data", onData);
|
|
130
|
+
logProc.stderr?.on("data", onData);
|
|
131
|
+
// Wait for container to finish
|
|
132
|
+
let exitCode = 0;
|
|
133
|
+
try {
|
|
134
|
+
const { stdout: waitOut } = await execFileAsync("docker", ["wait", opts.containerName]);
|
|
135
|
+
exitCode = parseInt(waitOut.trim(), 10);
|
|
136
|
+
if (isNaN(exitCode))
|
|
137
|
+
exitCode = 0;
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
exitCode = 1;
|
|
141
|
+
}
|
|
142
|
+
// Drain remaining buffered output
|
|
143
|
+
if (buf.trim())
|
|
144
|
+
emitter.emit("text", buf);
|
|
145
|
+
logProc.kill();
|
|
146
|
+
// Cleanup container
|
|
147
|
+
containerStarted = false;
|
|
148
|
+
try {
|
|
149
|
+
await execFileAsync("docker", ["rm", "-f", opts.containerName]);
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
// Best-effort cleanup
|
|
153
|
+
}
|
|
154
|
+
logger.info("docker:container-done", { name: opts.containerName, exitCode });
|
|
155
|
+
emitter.emit("exit", exitCode);
|
|
156
|
+
}
|
|
157
|
+
catch (err) {
|
|
158
|
+
logger.error("docker:error", { name: opts.containerName, error: String(err) });
|
|
159
|
+
emitter.emit("error", err);
|
|
160
|
+
emitter.emit("exit", 1);
|
|
161
|
+
}
|
|
162
|
+
})();
|
|
163
|
+
return emitter;
|
|
164
|
+
}
|
|
165
|
+
//# sourceMappingURL=docker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docker.js","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,IAAI,CAAC;QACH,MAAM,aAAa,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,EAAyC,CAAC,CAAC;QAClG,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AASD,MAAM,CAAC,KAAK,UAAU,qBAAqB;IACzC,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC/C,IAAI;YACJ,UAAU,EAAE,gBAAgB;YAC5B,UAAU,EAAE,mDAAmD;SAChE,CAAC,CAAC;QACH,OAAO,MAAM;aACV,IAAI,EAAE;aACN,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,OAAO,CAAC;aACf,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACpD,OAAO,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,IAAI,EAAE,EAAE,CAAC;QACxF,CAAC,CAAC,CAAC;IACP,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAQD;;;;;;;GAOG;AACH,MAAM,UAAU,cAAc,CAAC,IAO9B;IACC,MAAM,OAAO,GAAG,IAAI,YAAY,EAAwB,CAAC;IACzD,OAAO,CAAC,GAAG,GAAG,SAAS,CAAC;IACxB,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC;IAErB,IAAI,gBAAgB,GAAG,KAAK,CAAC;IAC7B,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,OAAO,CAAC,IAAI,GAAG,GAAG,EAAE;QAClB,MAAM,GAAG,IAAI,CAAC;QACd,IAAI,gBAAgB,EAAE,CAAC;YACrB,QAAQ,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,IAAI,CAAC;YACH,wBAAwB;YACxB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,wBAAwB,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC;gBAClE,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,qBAAqB,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC;YACjE,CAAC;YACD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;gBACrB,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,gBAAgB,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;gBACvD,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;YACrD,CAAC;YACD,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,sBAAsB,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAC7D,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,6BAA6B,CAAC,CAAC;YAClD,2DAA2D;YAC3D,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,kBAAkB,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,kBAAkB,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;YAErD,MAAM,eAAe,GAAG;gBACtB,QAAQ;gBACR,gDAAgD;gBAChD,uFAAuF;gBACvF,oDAAoD;gBACpD,mBAAmB;gBACnB,6DAA6D;gBAC7D,oIAAoI;gBACpI,iCAAiC;gBACjC,+DAA+D;gBAC/D,sBAAsB;gBACtB,0DAA0D;gBAC1D,gBAAgB;gBAChB,qDAAqD;gBACrD,0CAA0C;gBAC1C,qDAAqD;gBACrD,gHAAgH;gBAChH,aAAa;gBACb,oDAAoD;gBACpD,eAAe;gBACf,2EAA2E;gBAC3E,uGAAuG;aACxG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEf,IAAI,MAAM;gBAAE,OAAO;YAEnB,mCAAmC;YACnC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE;gBAC5D,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,IAAI,CAAC,aAAa;gBAC5B,GAAG,OAAO;gBACV,SAAS;gBACT,SAAS,EAAE,IAAI,EAAE,eAAe;aACjC,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC;YACpC,gBAAgB,GAAG,IAAI,CAAC;YACxB,MAAM,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEpF,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;gBACxB,OAAO;YACT,CAAC;YAED,6BAA6B;YAC7B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE;gBAClE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;aAClC,CAAC,CAAC;YAEH,IAAI,GAAG,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,CAAC,IAAY,EAAQ,EAAE;gBACpC,GAAG,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC9B,GAAG,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;gBACxB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC,CAAC;YACF,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YACnC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YAEnC,+BAA+B;YAC/B,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;gBACxF,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;gBACxC,IAAI,KAAK,CAAC,QAAQ,CAAC;oBAAE,QAAQ,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,QAAQ,GAAG,CAAC,CAAC;YACf,CAAC;YAED,kCAAkC;YAClC,IAAI,GAAG,CAAC,IAAI,EAAE;gBAAE,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,EAAE,CAAC;YAEf,oBAAoB;YACpB,gBAAgB,GAAG,KAAK,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,aAAa,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;YAAC,MAAM,CAAC;gBACP,sBAAsB;YACxB,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,uBAAuB,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC7E,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACjC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC/E,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator job template — generates task text for evaluator jobs in evolutionary plans.
|
|
3
|
+
*/
|
|
4
|
+
export type BranchEval = "test_pass_rate" | "pr_merged" | "manual";
|
|
5
|
+
export type BranchSelect = "best_score" | "score_prop" | "latest";
|
|
6
|
+
export interface EvaluatorOptions {
|
|
7
|
+
variantJobIds: string[];
|
|
8
|
+
variantBranches: (string | undefined)[];
|
|
9
|
+
branchEval: BranchEval;
|
|
10
|
+
branchSelect: BranchSelect;
|
|
11
|
+
stepId: string;
|
|
12
|
+
}
|
|
13
|
+
export declare function buildEvaluatorTask(opts: EvaluatorOptions): string;
|
|
14
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,MAAM,UAAU,GAAG,gBAAgB,GAAG,WAAW,GAAG,QAAQ,CAAC;AACnE,MAAM,MAAM,YAAY,GAAG,YAAY,GAAG,YAAY,GAAG,QAAQ,CAAC;AAElE,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,eAAe,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;IACxC,UAAU,EAAE,UAAU,CAAC;IACvB,YAAY,EAAE,YAAY,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,gBAAgB,GAAG,MAAM,CAmDjE"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator job template — generates task text for evaluator jobs in evolutionary plans.
|
|
3
|
+
*/
|
|
4
|
+
export function buildEvaluatorTask(opts) {
|
|
5
|
+
const { variantJobIds, variantBranches, branchEval, branchSelect, stepId } = opts;
|
|
6
|
+
const variantList = variantJobIds
|
|
7
|
+
.map((id, i) => ` - Variant ${i + 1}: job_id=${id}${variantBranches[i] ? `, branch=${variantBranches[i]}` : ""}`)
|
|
8
|
+
.join("\n");
|
|
9
|
+
const evalInstructions = buildEvalInstructions(branchEval);
|
|
10
|
+
const selectInstructions = buildSelectInstructions(branchSelect, variantJobIds.length);
|
|
11
|
+
return `You are an evaluator agent for an evolutionary branching plan (step: ${stepId}).
|
|
12
|
+
|
|
13
|
+
Your job is to evaluate ${variantJobIds.length} variant solutions, score them, and select the best one.
|
|
14
|
+
|
|
15
|
+
## Variants to Evaluate
|
|
16
|
+
|
|
17
|
+
${variantList}
|
|
18
|
+
|
|
19
|
+
## Evaluation Instructions
|
|
20
|
+
|
|
21
|
+
${evalInstructions}
|
|
22
|
+
|
|
23
|
+
## Scoring
|
|
24
|
+
|
|
25
|
+
For each variant, compute a score from 0.0 to 1.0:
|
|
26
|
+
- Check the job output using get_job_output for each variant job_id
|
|
27
|
+
- ${branchEval === "test_pass_rate" ? "Parse test results: look for patterns like 'X passing', 'X tests passed', 'X failed'. Score = (passing / (passing + failing)) * 0.7 + (exitCode === 0 ? 0.3 : 0)" : ""}
|
|
28
|
+
- ${branchEval === "pr_merged" ? "Check if a PR was merged: score = pr_merged ? 1.0 : (pr_exists ? 0.5 : 0.0)" : ""}
|
|
29
|
+
- ${branchEval === "manual" ? "Review the output quality manually and assign a score from 0.0 to 1.0 based on completeness and correctness" : ""}
|
|
30
|
+
- If a variant failed (status=failed or non-zero exit), score it 0.0
|
|
31
|
+
|
|
32
|
+
After computing each score, call set_job_score with the job_id and computed score.
|
|
33
|
+
|
|
34
|
+
## Winner Selection
|
|
35
|
+
|
|
36
|
+
${selectInstructions}
|
|
37
|
+
|
|
38
|
+
## Output
|
|
39
|
+
|
|
40
|
+
After evaluating all variants and calling set_job_score for each, output a JSON block exactly like this (on its own line):
|
|
41
|
+
|
|
42
|
+
WINNER: {"job_id": "<winning_job_id>", "variant_index": <N>, "branch": "<branch_or_null>", "score": <score>, "reason": "<brief reason>"}
|
|
43
|
+
|
|
44
|
+
This line will be parsed by downstream jobs to know which variant won.
|
|
45
|
+
|
|
46
|
+
## Important Notes
|
|
47
|
+
|
|
48
|
+
- Always call set_job_score for ALL variants, even if they scored 0.0
|
|
49
|
+
- Be objective in your evaluation
|
|
50
|
+
- If all variants scored 0.0, pick the one with the least errors or pick variant 1
|
|
51
|
+
`;
|
|
52
|
+
}
|
|
53
|
+
function buildEvalInstructions(branchEval) {
|
|
54
|
+
switch (branchEval) {
|
|
55
|
+
case "test_pass_rate":
|
|
56
|
+
return `For each variant job:
|
|
57
|
+
1. Call get_job_output with the variant's job_id
|
|
58
|
+
2. Search output for test result patterns: "X passing", "X tests passed", "X failed", "X failures"
|
|
59
|
+
3. Calculate pass rate = passing_tests / (passing_tests + failing_tests)
|
|
60
|
+
4. Check job exit code (exitCode=0 means success, non-zero means failure)
|
|
61
|
+
5. Score = pass_rate * 0.7 + (exitCode === 0 ? 0.3 : 0)`;
|
|
62
|
+
case "pr_merged":
|
|
63
|
+
return `For each variant job:
|
|
64
|
+
1. Call get_job_output with the variant's job_id to find the PR URL
|
|
65
|
+
2. Check if the PR was merged by looking for "merged" status in the output
|
|
66
|
+
3. Score = pr_merged ? 1.0 : (pr_created ? 0.5 : 0.0)`;
|
|
67
|
+
case "manual":
|
|
68
|
+
return `For each variant job:
|
|
69
|
+
1. Call get_job_output with the variant's job_id to review the full output
|
|
70
|
+
2. Assess the quality, completeness, and correctness of the work
|
|
71
|
+
3. Assign a score from 0.0 to 1.0 based on your assessment`;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function buildSelectInstructions(branchSelect, variantCount) {
|
|
75
|
+
switch (branchSelect) {
|
|
76
|
+
case "best_score":
|
|
77
|
+
return `Select the variant with the highest score. If there is a tie, pick the lowest variant index.`;
|
|
78
|
+
case "score_prop":
|
|
79
|
+
return `Select a winner using score-proportional (roulette wheel) selection:
|
|
80
|
+
1. Compute selection probability for each variant: p_i = score_i / sum(all_scores)
|
|
81
|
+
2. If all scores are 0, use uniform probability (1/${variantCount} each)
|
|
82
|
+
3. Generate a random number between 0 and 1, then pick the variant whose cumulative probability bracket contains that number
|
|
83
|
+
4. Higher score = more likely to be selected, but lower scorers can still win (prevents premature convergence)`;
|
|
84
|
+
case "latest":
|
|
85
|
+
return `Select the variant with the most recent completion time (last to finish). If unsure, pick the highest variant index.`;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAaH,MAAM,UAAU,kBAAkB,CAAC,IAAsB;IACvD,MAAM,EAAE,aAAa,EAAE,eAAe,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAElF,MAAM,WAAW,GAAG,aAAa;SAC9B,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,YAAY,EAAE,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;SACjH,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;IAC3D,MAAM,kBAAkB,GAAG,uBAAuB,CAAC,YAAY,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAEvF,OAAO,wEAAwE,MAAM;;0BAE7D,aAAa,CAAC,MAAM;;;;EAI5C,WAAW;;;;EAIX,gBAAgB;;;;;;IAMd,UAAU,KAAK,gBAAgB,CAAC,CAAC,CAAC,kKAAkK,CAAC,CAAC,CAAC,EAAE;IACzM,UAAU,KAAK,WAAW,CAAC,CAAC,CAAC,6EAA6E,CAAC,CAAC,CAAC,EAAE;IAC/G,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,6GAA6G,CAAC,CAAC,CAAC,EAAE;;;;;;;EAO9I,kBAAkB;;;;;;;;;;;;;;;CAenB,CAAC;AACF,CAAC;AAED,SAAS,qBAAqB,CAAC,UAAsB;IACnD,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,gBAAgB;YACnB,OAAO;;;;;wDAK2C,CAAC;QAErD,KAAK,WAAW;YACd,OAAO;;;sDAGyC,CAAC;QAEnD,KAAK,QAAQ;YACX,OAAO;;;2DAG8C,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB,CAAC,YAA0B,EAAE,YAAoB;IAC/E,QAAQ,YAAY,EAAE,CAAC;QACrB,KAAK,YAAY;YACf,OAAO,8FAA8F,CAAC;QAExG,KAAK,YAAY;YACf,OAAO;;qDAEwC,YAAY;;+GAE8C,CAAC;QAE5G,KAAK,QAAQ;YACX,OAAO,sHAAsH,CAAC;IAClI,CAAC;AACH,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -20,11 +20,13 @@ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
|
20
20
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
21
21
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
22
22
|
import { JobManager } from "./agent.js";
|
|
23
|
+
import { buildEvaluatorTask } from "./evaluator.js";
|
|
23
24
|
import { loadProfiles, upsertProfile, deleteProfile, getProfile, interpolate } from "./profiles.js";
|
|
24
25
|
import { planStore, jobStore, learningsStore } from "./store.js";
|
|
25
26
|
import { getNamespace } from "./namespace.js";
|
|
26
27
|
import { initRedis } from "./redis.js";
|
|
27
28
|
import { logger } from "./logger.js";
|
|
29
|
+
import { listCcAgentContainers } from "./docker.js";
|
|
28
30
|
import { v4 as uuidv4 } from "uuid";
|
|
29
31
|
import { execFile } from "child_process";
|
|
30
32
|
import { promisify } from "util";
|
|
@@ -115,6 +117,18 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
115
117
|
type: "string",
|
|
116
118
|
description: "Ollama host URL (default: 'http://localhost:11434'). Only used when ollama_model is set.",
|
|
117
119
|
},
|
|
120
|
+
docker_isolation: {
|
|
121
|
+
type: "boolean",
|
|
122
|
+
description: "Run the agent in a fresh Docker container for full filesystem and process isolation. Requires Docker (colima or Docker Desktop) to be running. Falls back to host mode if Docker is unavailable. Default: false.",
|
|
123
|
+
},
|
|
124
|
+
smoke_test: {
|
|
125
|
+
type: "string",
|
|
126
|
+
description: "Shell command to run as a cheap pre-check before the full task. If it exits non-zero or times out, the job fails immediately. Example: 'npm test -- --testPathPattern=smoke 2>&1 | tail -5'",
|
|
127
|
+
},
|
|
128
|
+
smoke_test_timeout: {
|
|
129
|
+
type: "number",
|
|
130
|
+
description: "Timeout for the smoke test in seconds (default 60). Only used when smoke_test is set.",
|
|
131
|
+
},
|
|
118
132
|
},
|
|
119
133
|
required: ["repo_url", "task"],
|
|
120
134
|
},
|
|
@@ -148,7 +162,15 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
148
162
|
{
|
|
149
163
|
name: "list_jobs",
|
|
150
164
|
description: "List all agent jobs (running, done, failed, cancelled).",
|
|
151
|
-
inputSchema: {
|
|
165
|
+
inputSchema: {
|
|
166
|
+
type: "object",
|
|
167
|
+
properties: {
|
|
168
|
+
min_score: {
|
|
169
|
+
type: "number",
|
|
170
|
+
description: "Only return jobs with score >= this value (0.0–1.0). Unscored jobs are excluded when this filter is set.",
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
},
|
|
152
174
|
},
|
|
153
175
|
{
|
|
154
176
|
name: "cancel_job",
|
|
@@ -280,6 +302,20 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
280
302
|
items: { type: "string" },
|
|
281
303
|
description: "Step IDs (from this plan) that must complete before this step starts",
|
|
282
304
|
},
|
|
305
|
+
branches: {
|
|
306
|
+
type: "number",
|
|
307
|
+
description: "If set, spawn this many parallel variant jobs for this step instead of 1. An evaluator job is automatically added to score and select the best variant.",
|
|
308
|
+
},
|
|
309
|
+
branch_eval: {
|
|
310
|
+
type: "string",
|
|
311
|
+
enum: ["test_pass_rate", "pr_merged", "manual"],
|
|
312
|
+
description: "How to score variants: test_pass_rate (parse test output), pr_merged (check PR status), manual (evaluator uses judgment). Default: test_pass_rate",
|
|
313
|
+
},
|
|
314
|
+
branch_select: {
|
|
315
|
+
type: "string",
|
|
316
|
+
enum: ["best_score", "score_prop", "latest"],
|
|
317
|
+
description: "How to pick the winner: best_score (highest score wins), score_prop (score-proportional random selection), latest (most recently completed). Default: best_score",
|
|
318
|
+
},
|
|
283
319
|
},
|
|
284
320
|
required: ["id", "repo_url", "task"],
|
|
285
321
|
},
|
|
@@ -381,6 +417,19 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
381
417
|
required: ["job_id"],
|
|
382
418
|
},
|
|
383
419
|
},
|
|
420
|
+
{
|
|
421
|
+
name: "set_job_score",
|
|
422
|
+
description: "Set a quality score (0.0–1.0) on a completed job. Used by evaluator agents in evolutionary branching plans to record how well each variant performed.",
|
|
423
|
+
inputSchema: {
|
|
424
|
+
type: "object",
|
|
425
|
+
properties: {
|
|
426
|
+
job_id: { type: "string", description: "Job ID to score" },
|
|
427
|
+
score: { type: "number", description: "Score from 0.0 to 1.0" },
|
|
428
|
+
reason: { type: "string", description: "Optional reason or explanation for the score" },
|
|
429
|
+
},
|
|
430
|
+
required: ["job_id", "score"],
|
|
431
|
+
},
|
|
432
|
+
},
|
|
384
433
|
{
|
|
385
434
|
name: "get_learnings",
|
|
386
435
|
description: "Return accumulated learnings for a namespace. Learnings are written by agents at the end of each job and stored per-namespace. Use this to understand what prior agents have discovered.",
|
|
@@ -411,6 +460,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
411
460
|
},
|
|
412
461
|
},
|
|
413
462
|
},
|
|
463
|
+
{
|
|
464
|
+
name: "docker_ps",
|
|
465
|
+
description: "List currently running cc-agent Docker containers. Shows container name, status, and uptime.",
|
|
466
|
+
inputSchema: { type: "object", properties: {} },
|
|
467
|
+
},
|
|
414
468
|
{
|
|
415
469
|
name: "spawn_from_profile",
|
|
416
470
|
description: "Spawn an agent job from a saved profile. Supports variable interpolation and per-call overrides.",
|
|
@@ -466,6 +520,9 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
466
520
|
model: a.model,
|
|
467
521
|
ollamaModel: a.ollama_model,
|
|
468
522
|
ollamaHost: a.ollama_host,
|
|
523
|
+
dockerIsolation: a.docker_isolation,
|
|
524
|
+
smokeTest: a.smoke_test,
|
|
525
|
+
smokeTestTimeout: a.smoke_test_timeout,
|
|
469
526
|
requiresApproval: !isTrusted,
|
|
470
527
|
});
|
|
471
528
|
if (!isTrusted && owner) {
|
|
@@ -545,6 +602,8 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
545
602
|
cost_usd: job.costUsd,
|
|
546
603
|
usage: job.usage,
|
|
547
604
|
approval_issue_url: job.approvalIssueUrl,
|
|
605
|
+
score: job.score ?? null,
|
|
606
|
+
score_source: job.scoreSource ?? null,
|
|
548
607
|
}),
|
|
549
608
|
},
|
|
550
609
|
],
|
|
@@ -572,7 +631,11 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
572
631
|
}
|
|
573
632
|
case "list_jobs": {
|
|
574
633
|
logger.info("tool:list_jobs");
|
|
575
|
-
const
|
|
634
|
+
const minScore = typeof a.min_score === "number" ? a.min_score : undefined;
|
|
635
|
+
let jobs = (await jobStore.listJobs()) ?? [];
|
|
636
|
+
if (minScore !== undefined) {
|
|
637
|
+
jobs = jobs.filter((j) => j.score != null && j.score >= minScore);
|
|
638
|
+
}
|
|
576
639
|
const namespace = getNamespace();
|
|
577
640
|
const learnings_count = await learningsStore.getLearningsCount(namespace);
|
|
578
641
|
return {
|
|
@@ -737,16 +800,72 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
737
800
|
throw new Error(`Step '${step.id}' depends_on unknown step '${sid}'`);
|
|
738
801
|
return jobId;
|
|
739
802
|
});
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
803
|
+
if (step.branches && step.branches > 1) {
|
|
804
|
+
// Evolutionary mode: spawn N variant jobs in parallel
|
|
805
|
+
const branchEval = step.branch_eval ?? "test_pass_rate";
|
|
806
|
+
const branchSelect = step.branch_select ?? "best_score";
|
|
807
|
+
const variantJobIds = [];
|
|
808
|
+
const variantBranches = [];
|
|
809
|
+
for (let i = 1; i <= step.branches; i++) {
|
|
810
|
+
const branchName = step.create_branch ? `${step.create_branch}-v${i}` : undefined;
|
|
811
|
+
variantBranches.push(branchName);
|
|
812
|
+
const jobId = await manager.spawn({
|
|
813
|
+
repoUrl: step.repo_url,
|
|
814
|
+
task: step.task,
|
|
815
|
+
createBranch: branchName,
|
|
816
|
+
dependsOn: resolvedDeps,
|
|
817
|
+
variantIndex: i,
|
|
818
|
+
});
|
|
819
|
+
variantJobIds.push(jobId);
|
|
820
|
+
}
|
|
821
|
+
// Update siblings on all variant jobs
|
|
822
|
+
for (const jobId of variantJobIds) {
|
|
823
|
+
manager.setJobSiblings(jobId, variantJobIds.filter((id) => id !== jobId));
|
|
824
|
+
}
|
|
825
|
+
// Build evaluator task and spawn evaluator job
|
|
826
|
+
const evalTask = buildEvaluatorTask({
|
|
827
|
+
variantJobIds,
|
|
828
|
+
variantBranches,
|
|
829
|
+
branchEval,
|
|
830
|
+
branchSelect,
|
|
831
|
+
stepId: step.id,
|
|
832
|
+
});
|
|
833
|
+
const evalJobId = await manager.spawn({
|
|
834
|
+
repoUrl: step.repo_url,
|
|
835
|
+
task: evalTask,
|
|
836
|
+
dependsOn: variantJobIds,
|
|
837
|
+
});
|
|
838
|
+
// The logical step ID maps to the evaluator job (so subsequent steps depend on it)
|
|
839
|
+
stepIdToJobId.set(step.id, evalJobId);
|
|
840
|
+
// Track variant jobs
|
|
841
|
+
for (let i = 0; i < variantJobIds.length; i++) {
|
|
842
|
+
results.push({
|
|
843
|
+
stepId: `${step.id}-v${i + 1}`,
|
|
844
|
+
jobId: variantJobIds[i],
|
|
845
|
+
status: resolvedDeps?.length ? "pending" : "cloning",
|
|
846
|
+
role: "variant",
|
|
847
|
+
});
|
|
848
|
+
}
|
|
849
|
+
// Track evaluator job
|
|
850
|
+
results.push({
|
|
851
|
+
stepId: step.id,
|
|
852
|
+
jobId: evalJobId,
|
|
853
|
+
status: "pending",
|
|
854
|
+
role: "evaluator",
|
|
855
|
+
});
|
|
856
|
+
}
|
|
857
|
+
else {
|
|
858
|
+
// Standard single job
|
|
859
|
+
const jobId = await manager.spawn({
|
|
860
|
+
repoUrl: step.repo_url,
|
|
861
|
+
task: step.task,
|
|
862
|
+
createBranch: step.create_branch,
|
|
863
|
+
dependsOn: resolvedDeps,
|
|
864
|
+
});
|
|
865
|
+
stepIdToJobId.set(step.id, jobId);
|
|
866
|
+
results.push({ stepId: step.id, jobId, status: resolvedDeps?.length ? "pending" : "cloning" });
|
|
867
|
+
}
|
|
748
868
|
}
|
|
749
|
-
// Persist the plan record
|
|
750
869
|
const planId = uuidv4();
|
|
751
870
|
planStore.savePlan({ id: planId, goal, steps: results, createdAt: new Date().toISOString() }).catch(() => { });
|
|
752
871
|
return {
|
|
@@ -903,6 +1022,13 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
903
1022
|
content: [{ type: "text", text: JSON.stringify(result) }],
|
|
904
1023
|
};
|
|
905
1024
|
}
|
|
1025
|
+
case "set_job_score": {
|
|
1026
|
+
logger.info("tool:set_job_score", { job_id: a.job_id, score: a.score });
|
|
1027
|
+
const result = manager.setJobScore(a.job_id, a.score, a.reason);
|
|
1028
|
+
return {
|
|
1029
|
+
content: [{ type: "text", text: JSON.stringify(result) }],
|
|
1030
|
+
};
|
|
1031
|
+
}
|
|
906
1032
|
case "get_learnings": {
|
|
907
1033
|
const ns = a.namespace ?? getNamespace();
|
|
908
1034
|
const limit = typeof a.limit === "number" ? a.limit : 10;
|
|
@@ -926,6 +1052,16 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
926
1052
|
}],
|
|
927
1053
|
};
|
|
928
1054
|
}
|
|
1055
|
+
case "docker_ps": {
|
|
1056
|
+
logger.info("tool:docker_ps");
|
|
1057
|
+
const containers = await listCcAgentContainers();
|
|
1058
|
+
return {
|
|
1059
|
+
content: [{
|
|
1060
|
+
type: "text",
|
|
1061
|
+
text: JSON.stringify({ containers, total: containers.length }),
|
|
1062
|
+
}],
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
929
1065
|
default:
|
|
930
1066
|
throw new Error(`Unknown tool: ${name}`);
|
|
931
1067
|
}
|