@gonzih/cc-agent 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/docker.js ADDED
@@ -0,0 +1,165 @@
1
+ import { execFile, spawn } from "child_process";
2
+ import { EventEmitter } from "events";
3
+ import { promisify } from "util";
4
+ import { logger } from "./logger.js";
5
+ const execFileAsync = promisify(execFile);
6
+ export async function isDockerAvailable() {
7
+ try {
8
+ await execFileAsync("docker", ["info"], { timeout: 5000 });
9
+ return true;
10
+ }
11
+ catch {
12
+ return false;
13
+ }
14
+ }
15
+ export async function listCcAgentContainers() {
16
+ try {
17
+ const { stdout } = await execFileAsync("docker", [
18
+ "ps",
19
+ "--filter", "name=cc-agent-",
20
+ "--format", "{{.ID}}\t{{.Names}}\t{{.Status}}\t{{.RunningFor}}",
21
+ ]);
22
+ return stdout
23
+ .trim()
24
+ .split("\n")
25
+ .filter(Boolean)
26
+ .map((line) => {
27
+ const [id, name, status, uptime] = line.split("\t");
28
+ return { id: id ?? "", name: name ?? "", status: status ?? "", uptime: uptime ?? "" };
29
+ });
30
+ }
31
+ catch {
32
+ return [];
33
+ }
34
+ }
35
+ /**
36
+ * Run a cc-agent job inside a Docker container.
37
+ *
38
+ * Emits:
39
+ * "text" (line: string) — each line of container output
40
+ * "exit" (code: number) — container exit code
41
+ * "error" (err: unknown) — fatal error before container starts
42
+ */
43
+ export function runDockerAgent(opts) {
44
+ const emitter = new EventEmitter();
45
+ emitter.pid = undefined;
46
+ emitter.stdin = null;
47
+ let containerStarted = false;
48
+ let killed = false;
49
+ emitter.kill = () => {
50
+ killed = true;
51
+ if (containerStarted) {
52
+ execFile("docker", ["rm", "-f", opts.containerName], () => { });
53
+ }
54
+ };
55
+ void (async () => {
56
+ try {
57
+ // Build docker env args
58
+ const envArgs = [];
59
+ if (opts.anthropicToken) {
60
+ envArgs.push("-e", `ANTHROPIC_AUTH_TOKEN=${opts.anthropicToken}`);
61
+ envArgs.push("-e", `ANTHROPIC_API_KEY=${opts.anthropicToken}`);
62
+ }
63
+ if (opts.githubToken) {
64
+ envArgs.push("-e", `GITHUB_TOKEN=${opts.githubToken}`);
65
+ envArgs.push("-e", `GH_TOKEN=${opts.githubToken}`);
66
+ }
67
+ if (opts.namespace) {
68
+ envArgs.push("-e", `CC_AGENT_NAMESPACE=${opts.namespace}`);
69
+ }
70
+ envArgs.push("-e", "HOME=/root");
71
+ envArgs.push("-e", "GIT_CONFIG_GLOBAL=/dev/null");
72
+ // Pass task and repo via env to avoid shell quoting issues
73
+ envArgs.push("-e", `CC_DOCKER_TASK=${opts.task}`);
74
+ envArgs.push("-e", `CC_DOCKER_REPO=${opts.repoUrl}`);
75
+ const containerScript = [
76
+ "set -e",
77
+ // Install system deps (node:22 is Debian-based)
78
+ "apt-get update -qq >/dev/null 2>&1 && apt-get install -y -qq git curl >/dev/null 2>&1",
79
+ // Install gh CLI via direct binary download (amd64)
80
+ "GH_VERSION=2.65.0",
81
+ "ARCH=$(dpkg --print-architecture 2>/dev/null || echo amd64)",
82
+ "curl -fsSL \"https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_${ARCH}.tar.gz\" -o /tmp/gh.tar.gz",
83
+ "tar -xzf /tmp/gh.tar.gz -C /tmp",
84
+ "mv /tmp/gh_${GH_VERSION}_linux_${ARCH}/bin/gh /usr/local/bin/",
85
+ // Install claude-code
86
+ "npm install -g @anthropic-ai/claude-code >/dev/null 2>&1",
87
+ // Configure git
88
+ "git config --global user.email 'cc-agent@localhost'",
89
+ "git config --global user.name 'cc-agent'",
90
+ // Configure HTTPS credential helper for GitHub token
91
+ "git config --global credential.helper '!f() { echo username=x-access-token; echo password=$GITHUB_TOKEN; }; f'",
92
+ // Clone repo
93
+ "git clone --depth 1 \"$CC_DOCKER_REPO\" /workspace",
94
+ "cd /workspace",
95
+ // Run Claude (dangerously-skip-permissions needed for non-interactive use)
96
+ "exec claude --dangerously-skip-permissions --print --output-format stream-json -p \"$CC_DOCKER_TASK\"",
97
+ ].join(" && ");
98
+ if (killed)
99
+ return;
100
+ // Start container in detached mode
101
+ const { stdout: dockerIdRaw } = await execFileAsync("docker", [
102
+ "run", "-d",
103
+ "--name", opts.containerName,
104
+ ...envArgs,
105
+ "node:22",
106
+ "/bin/sh", "-c", containerScript,
107
+ ]);
108
+ const dockerId = dockerIdRaw.trim();
109
+ containerStarted = true;
110
+ logger.info("docker:container-started", { name: opts.containerName, id: dockerId });
111
+ if (killed) {
112
+ execFile("docker", ["rm", "-f", opts.containerName], () => { });
113
+ emitter.emit("exit", 1);
114
+ return;
115
+ }
116
+ // Stream logs from container
117
+ const logProc = spawn("docker", ["logs", "-f", opts.containerName], {
118
+ stdio: ["ignore", "pipe", "pipe"],
119
+ });
120
+ let buf = "";
121
+ const onData = (data) => {
122
+ buf += data.toString();
123
+ const lines = buf.split("\n");
124
+ buf = lines.pop() ?? "";
125
+ for (const line of lines) {
126
+ emitter.emit("text", line);
127
+ }
128
+ };
129
+ logProc.stdout?.on("data", onData);
130
+ logProc.stderr?.on("data", onData);
131
+ // Wait for container to finish
132
+ let exitCode = 0;
133
+ try {
134
+ const { stdout: waitOut } = await execFileAsync("docker", ["wait", opts.containerName]);
135
+ exitCode = parseInt(waitOut.trim(), 10);
136
+ if (isNaN(exitCode))
137
+ exitCode = 0;
138
+ }
139
+ catch {
140
+ exitCode = 1;
141
+ }
142
+ // Drain remaining buffered output
143
+ if (buf.trim())
144
+ emitter.emit("text", buf);
145
+ logProc.kill();
146
+ // Cleanup container
147
+ containerStarted = false;
148
+ try {
149
+ await execFileAsync("docker", ["rm", "-f", opts.containerName]);
150
+ }
151
+ catch {
152
+ // Best-effort cleanup
153
+ }
154
+ logger.info("docker:container-done", { name: opts.containerName, exitCode });
155
+ emitter.emit("exit", exitCode);
156
+ }
157
+ catch (err) {
158
+ logger.error("docker:error", { name: opts.containerName, error: String(err) });
159
+ emitter.emit("error", err);
160
+ emitter.emit("exit", 1);
161
+ }
162
+ })();
163
+ return emitter;
164
+ }
165
+ //# sourceMappingURL=docker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docker.js","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,IAAI,CAAC;QACH,MAAM,aAAa,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,EAAyC,CAAC,CAAC;QAClG,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AASD,MAAM,CAAC,KAAK,UAAU,qBAAqB;IACzC,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC/C,IAAI;YACJ,UAAU,EAAE,gBAAgB;YAC5B,UAAU,EAAE,mDAAmD;SAChE,CAAC,CAAC;QACH,OAAO,MAAM;aACV,IAAI,EAAE;aACN,KAAK,CAAC,IAAI,CAAC;aACX,MAAM,CAAC,OAAO,CAAC;aACf,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACpD,OAAO,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,IAAI,EAAE,EAAE,CAAC;QACxF,CAAC,CAAC,CAAC;IACP,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAQD;;;;;;;GAOG;AACH,MAAM,UAAU,cAAc,CAAC,IAO9B;IACC,MAAM,OAAO,GAAG,IAAI,YAAY,EAAwB,CAAC;IACzD,OAAO,CAAC,GAAG,GAAG,SAAS,CAAC;IACxB,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC;IAErB,IAAI,gBAAgB,GAAG,KAAK,CAAC;IAC7B,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,OAAO,CAAC,IAAI,GAAG,GAAG,EAAE;QAClB,MAAM,GAAG,IAAI,CAAC;QACd,IAAI,gBAAgB,EAAE,CAAC;YACrB,QAAQ,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,IAAI,CAAC;YACH,wBAAwB;YACxB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,wBAAwB,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC;gBAClE,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,qBAAqB,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC;YACjE,CAAC;YACD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;gBACrB,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,gBAAgB,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;gBACvD,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;YACrD,CAAC;YACD,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,sBAAsB,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAC7D,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,6BAA6B,CAAC,CAAC;YAClD,2DAA2D;YAC3D,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,kBAAkB,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,kBAAkB,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;YAErD,MAAM,eAAe,GAAG;gBACtB,QAAQ;gBACR,gDAAgD;gBAChD,uFAAuF;gBACvF,oDAAoD;gBACpD,mBAAmB;gBACnB,6DAA6D;gBAC7D,oIAAoI;gBACpI,iCAAiC;gBACjC,+DAA+D;gBAC/D,sBAAsB;gBACtB,0DAA0D;gBAC1D,gBAAgB;gBAChB,qDAAqD;gBACrD,0CAA0C;gBAC1C,qDAAqD;gBACrD,gHAAgH;gBAChH,aAAa;gBACb,oDAAoD;gBACpD,eAAe;gBACf,2EAA2E;gBAC3E,uGAAuG;aACxG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEf,IAAI,MAAM;gBAAE,OAAO;YAEnB,mCAAmC;YACnC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE;gBAC5D,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,IAAI,CAAC,aAAa;gBAC5B,GAAG,OAAO;gBACV,SAAS;gBACT,SAAS,EAAE,IAAI,EAAE,eAAe;aACjC,CAAC,CAAC;YACH,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC;YACpC,gBAAgB,GAAG,IAAI,CAAC;YACxB,MAAM,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEpF,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;gBACxB,OAAO;YACT,CAAC;YAED,6BAA6B;YAC7B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,EAAE;gBAClE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;aAClC,CAAC,CAAC;YAEH,IAAI,GAAG,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,CAAC,IAAY,EAAQ,EAAE;gBACpC,GAAG,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC9B,GAAG,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;gBACxB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC,CAAC;YACF,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YACnC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YAEnC,+BAA+B;YAC/B,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;gBACxF,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;gBACxC,IAAI,KAAK,CAAC,QAAQ,CAAC;oBAAE,QAAQ,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,QAAQ,GAAG,CAAC,CAAC;YACf,CAAC;YAED,kCAAkC;YAClC,IAAI,GAAG,CAAC,IAAI,EAAE;gBAAE,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,EAAE,CAAC;YAEf,oBAAoB;YACpB,gBAAgB,GAAG,KAAK,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,aAAa,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;YAAC,MAAM,CAAC;gBACP,sBAAsB;YACxB,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,uBAAuB,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC7E,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACjC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC/E,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Evaluator job template — generates task text for evaluator jobs in evolutionary plans.
3
+ */
4
+ export type BranchEval = "test_pass_rate" | "pr_merged" | "manual";
5
+ export type BranchSelect = "best_score" | "score_prop" | "latest";
6
+ export interface EvaluatorOptions {
7
+ variantJobIds: string[];
8
+ variantBranches: (string | undefined)[];
9
+ branchEval: BranchEval;
10
+ branchSelect: BranchSelect;
11
+ stepId: string;
12
+ }
13
+ export declare function buildEvaluatorTask(opts: EvaluatorOptions): string;
14
+ //# sourceMappingURL=evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,MAAM,UAAU,GAAG,gBAAgB,GAAG,WAAW,GAAG,QAAQ,CAAC;AACnE,MAAM,MAAM,YAAY,GAAG,YAAY,GAAG,YAAY,GAAG,QAAQ,CAAC;AAElE,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,eAAe,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;IACxC,UAAU,EAAE,UAAU,CAAC;IACvB,YAAY,EAAE,YAAY,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,gBAAgB,GAAG,MAAM,CAmDjE"}
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Evaluator job template — generates task text for evaluator jobs in evolutionary plans.
3
+ */
4
+ export function buildEvaluatorTask(opts) {
5
+ const { variantJobIds, variantBranches, branchEval, branchSelect, stepId } = opts;
6
+ const variantList = variantJobIds
7
+ .map((id, i) => ` - Variant ${i + 1}: job_id=${id}${variantBranches[i] ? `, branch=${variantBranches[i]}` : ""}`)
8
+ .join("\n");
9
+ const evalInstructions = buildEvalInstructions(branchEval);
10
+ const selectInstructions = buildSelectInstructions(branchSelect, variantJobIds.length);
11
+ return `You are an evaluator agent for an evolutionary branching plan (step: ${stepId}).
12
+
13
+ Your job is to evaluate ${variantJobIds.length} variant solutions, score them, and select the best one.
14
+
15
+ ## Variants to Evaluate
16
+
17
+ ${variantList}
18
+
19
+ ## Evaluation Instructions
20
+
21
+ ${evalInstructions}
22
+
23
+ ## Scoring
24
+
25
+ For each variant, compute a score from 0.0 to 1.0:
26
+ - Check the job output using get_job_output for each variant job_id
27
+ - ${branchEval === "test_pass_rate" ? "Parse test results: look for patterns like 'X passing', 'X tests passed', 'X failed'. Score = (passing / (passing + failing)) * 0.7 + (exitCode === 0 ? 0.3 : 0)" : ""}
28
+ - ${branchEval === "pr_merged" ? "Check if a PR was merged: score = pr_merged ? 1.0 : (pr_exists ? 0.5 : 0.0)" : ""}
29
+ - ${branchEval === "manual" ? "Review the output quality manually and assign a score from 0.0 to 1.0 based on completeness and correctness" : ""}
30
+ - If a variant failed (status=failed or non-zero exit), score it 0.0
31
+
32
+ After computing each score, call set_job_score with the job_id and computed score.
33
+
34
+ ## Winner Selection
35
+
36
+ ${selectInstructions}
37
+
38
+ ## Output
39
+
40
+ After evaluating all variants and calling set_job_score for each, output a JSON block exactly like this (on its own line):
41
+
42
+ WINNER: {"job_id": "<winning_job_id>", "variant_index": <N>, "branch": "<branch_or_null>", "score": <score>, "reason": "<brief reason>"}
43
+
44
+ This line will be parsed by downstream jobs to know which variant won.
45
+
46
+ ## Important Notes
47
+
48
+ - Always call set_job_score for ALL variants, even if they scored 0.0
49
+ - Be objective in your evaluation
50
+ - If all variants scored 0.0, pick the one with the least errors or pick variant 1
51
+ `;
52
+ }
53
+ function buildEvalInstructions(branchEval) {
54
+ switch (branchEval) {
55
+ case "test_pass_rate":
56
+ return `For each variant job:
57
+ 1. Call get_job_output with the variant's job_id
58
+ 2. Search output for test result patterns: "X passing", "X tests passed", "X failed", "X failures"
59
+ 3. Calculate pass rate = passing_tests / (passing_tests + failing_tests)
60
+ 4. Check job exit code (exitCode=0 means success, non-zero means failure)
61
+ 5. Score = pass_rate * 0.7 + (exitCode === 0 ? 0.3 : 0)`;
62
+ case "pr_merged":
63
+ return `For each variant job:
64
+ 1. Call get_job_output with the variant's job_id to find the PR URL
65
+ 2. Check if the PR was merged by looking for "merged" status in the output
66
+ 3. Score = pr_merged ? 1.0 : (pr_created ? 0.5 : 0.0)`;
67
+ case "manual":
68
+ return `For each variant job:
69
+ 1. Call get_job_output with the variant's job_id to review the full output
70
+ 2. Assess the quality, completeness, and correctness of the work
71
+ 3. Assign a score from 0.0 to 1.0 based on your assessment`;
72
+ }
73
+ }
74
+ function buildSelectInstructions(branchSelect, variantCount) {
75
+ switch (branchSelect) {
76
+ case "best_score":
77
+ return `Select the variant with the highest score. If there is a tie, pick the lowest variant index.`;
78
+ case "score_prop":
79
+ return `Select a winner using score-proportional (roulette wheel) selection:
80
+ 1. Compute selection probability for each variant: p_i = score_i / sum(all_scores)
81
+ 2. If all scores are 0, use uniform probability (1/${variantCount} each)
82
+ 3. Generate a random number between 0 and 1, then pick the variant whose cumulative probability bracket contains that number
83
+ 4. Higher score = more likely to be selected, but lower scorers can still win (prevents premature convergence)`;
84
+ case "latest":
85
+ return `Select the variant with the most recent completion time (last to finish). If unsure, pick the highest variant index.`;
86
+ }
87
+ }
88
+ //# sourceMappingURL=evaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAaH,MAAM,UAAU,kBAAkB,CAAC,IAAsB;IACvD,MAAM,EAAE,aAAa,EAAE,eAAe,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAElF,MAAM,WAAW,GAAG,aAAa;SAC9B,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,YAAY,EAAE,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,eAAe,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;SACjH,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;IAC3D,MAAM,kBAAkB,GAAG,uBAAuB,CAAC,YAAY,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAEvF,OAAO,wEAAwE,MAAM;;0BAE7D,aAAa,CAAC,MAAM;;;;EAI5C,WAAW;;;;EAIX,gBAAgB;;;;;;IAMd,UAAU,KAAK,gBAAgB,CAAC,CAAC,CAAC,kKAAkK,CAAC,CAAC,CAAC,EAAE;IACzM,UAAU,KAAK,WAAW,CAAC,CAAC,CAAC,6EAA6E,CAAC,CAAC,CAAC,EAAE;IAC/G,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,6GAA6G,CAAC,CAAC,CAAC,EAAE;;;;;;;EAO9I,kBAAkB;;;;;;;;;;;;;;;CAenB,CAAC;AACF,CAAC;AAED,SAAS,qBAAqB,CAAC,UAAsB;IACnD,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,gBAAgB;YACnB,OAAO;;;;;wDAK2C,CAAC;QAErD,KAAK,WAAW;YACd,OAAO;;;sDAGyC,CAAC;QAEnD,KAAK,QAAQ;YACX,OAAO;;;2DAG8C,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB,CAAC,YAA0B,EAAE,YAAoB;IAC/E,QAAQ,YAAY,EAAE,CAAC;QACrB,KAAK,YAAY;YACf,OAAO,8FAA8F,CAAC;QAExG,KAAK,YAAY;YACf,OAAO;;qDAEwC,YAAY;;+GAE8C,CAAC;QAE5G,KAAK,QAAQ;YACX,OAAO,sHAAsH,CAAC;IAClI,CAAC;AACH,CAAC"}
package/dist/index.js CHANGED
@@ -20,11 +20,13 @@ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
20
20
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
21
21
  import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
22
22
  import { JobManager } from "./agent.js";
23
+ import { buildEvaluatorTask } from "./evaluator.js";
23
24
  import { loadProfiles, upsertProfile, deleteProfile, getProfile, interpolate } from "./profiles.js";
24
25
  import { planStore, jobStore, learningsStore } from "./store.js";
25
26
  import { getNamespace } from "./namespace.js";
26
27
  import { initRedis } from "./redis.js";
27
28
  import { logger } from "./logger.js";
29
+ import { listCcAgentContainers } from "./docker.js";
28
30
  import { v4 as uuidv4 } from "uuid";
29
31
  import { execFile } from "child_process";
30
32
  import { promisify } from "util";
@@ -115,6 +117,18 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
115
117
  type: "string",
116
118
  description: "Ollama host URL (default: 'http://localhost:11434'). Only used when ollama_model is set.",
117
119
  },
120
+ docker_isolation: {
121
+ type: "boolean",
122
+ description: "Run the agent in a fresh Docker container for full filesystem and process isolation. Requires Docker (colima or Docker Desktop) to be running. Falls back to host mode if Docker is unavailable. Default: false.",
123
+ },
124
+ smoke_test: {
125
+ type: "string",
126
+ description: "Shell command to run as a cheap pre-check before the full task. If it exits non-zero or times out, the job fails immediately. Example: 'npm test -- --testPathPattern=smoke 2>&1 | tail -5'",
127
+ },
128
+ smoke_test_timeout: {
129
+ type: "number",
130
+ description: "Timeout for the smoke test in seconds (default 60). Only used when smoke_test is set.",
131
+ },
118
132
  },
119
133
  required: ["repo_url", "task"],
120
134
  },
@@ -148,7 +162,15 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
148
162
  {
149
163
  name: "list_jobs",
150
164
  description: "List all agent jobs (running, done, failed, cancelled).",
151
- inputSchema: { type: "object", properties: {} },
165
+ inputSchema: {
166
+ type: "object",
167
+ properties: {
168
+ min_score: {
169
+ type: "number",
170
+ description: "Only return jobs with score >= this value (0.0–1.0). Unscored jobs are excluded when this filter is set.",
171
+ },
172
+ },
173
+ },
152
174
  },
153
175
  {
154
176
  name: "cancel_job",
@@ -280,6 +302,20 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
280
302
  items: { type: "string" },
281
303
  description: "Step IDs (from this plan) that must complete before this step starts",
282
304
  },
305
+ branches: {
306
+ type: "number",
307
+ description: "If set, spawn this many parallel variant jobs for this step instead of 1. An evaluator job is automatically added to score and select the best variant.",
308
+ },
309
+ branch_eval: {
310
+ type: "string",
311
+ enum: ["test_pass_rate", "pr_merged", "manual"],
312
+ description: "How to score variants: test_pass_rate (parse test output), pr_merged (check PR status), manual (evaluator uses judgment). Default: test_pass_rate",
313
+ },
314
+ branch_select: {
315
+ type: "string",
316
+ enum: ["best_score", "score_prop", "latest"],
317
+ description: "How to pick the winner: best_score (highest score wins), score_prop (score-proportional random selection), latest (most recently completed). Default: best_score",
318
+ },
283
319
  },
284
320
  required: ["id", "repo_url", "task"],
285
321
  },
@@ -381,6 +417,19 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
381
417
  required: ["job_id"],
382
418
  },
383
419
  },
420
+ {
421
+ name: "set_job_score",
422
+ description: "Set a quality score (0.0–1.0) on a completed job. Used by evaluator agents in evolutionary branching plans to record how well each variant performed.",
423
+ inputSchema: {
424
+ type: "object",
425
+ properties: {
426
+ job_id: { type: "string", description: "Job ID to score" },
427
+ score: { type: "number", description: "Score from 0.0 to 1.0" },
428
+ reason: { type: "string", description: "Optional reason or explanation for the score" },
429
+ },
430
+ required: ["job_id", "score"],
431
+ },
432
+ },
384
433
  {
385
434
  name: "get_learnings",
386
435
  description: "Return accumulated learnings for a namespace. Learnings are written by agents at the end of each job and stored per-namespace. Use this to understand what prior agents have discovered.",
@@ -411,6 +460,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
411
460
  },
412
461
  },
413
462
  },
463
+ {
464
+ name: "docker_ps",
465
+ description: "List currently running cc-agent Docker containers. Shows container name, status, and uptime.",
466
+ inputSchema: { type: "object", properties: {} },
467
+ },
414
468
  {
415
469
  name: "spawn_from_profile",
416
470
  description: "Spawn an agent job from a saved profile. Supports variable interpolation and per-call overrides.",
@@ -466,6 +520,9 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
466
520
  model: a.model,
467
521
  ollamaModel: a.ollama_model,
468
522
  ollamaHost: a.ollama_host,
523
+ dockerIsolation: a.docker_isolation,
524
+ smokeTest: a.smoke_test,
525
+ smokeTestTimeout: a.smoke_test_timeout,
469
526
  requiresApproval: !isTrusted,
470
527
  });
471
528
  if (!isTrusted && owner) {
@@ -545,6 +602,8 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
545
602
  cost_usd: job.costUsd,
546
603
  usage: job.usage,
547
604
  approval_issue_url: job.approvalIssueUrl,
605
+ score: job.score ?? null,
606
+ score_source: job.scoreSource ?? null,
548
607
  }),
549
608
  },
550
609
  ],
@@ -572,7 +631,11 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
572
631
  }
573
632
  case "list_jobs": {
574
633
  logger.info("tool:list_jobs");
575
- const jobs = (await jobStore.listJobs()) ?? [];
634
+ const minScore = typeof a.min_score === "number" ? a.min_score : undefined;
635
+ let jobs = (await jobStore.listJobs()) ?? [];
636
+ if (minScore !== undefined) {
637
+ jobs = jobs.filter((j) => j.score != null && j.score >= minScore);
638
+ }
576
639
  const namespace = getNamespace();
577
640
  const learnings_count = await learningsStore.getLearningsCount(namespace);
578
641
  return {
@@ -737,16 +800,72 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
737
800
  throw new Error(`Step '${step.id}' depends_on unknown step '${sid}'`);
738
801
  return jobId;
739
802
  });
740
- const jobId = await manager.spawn({
741
- repoUrl: step.repo_url,
742
- task: step.task,
743
- createBranch: step.create_branch,
744
- dependsOn: resolvedDeps,
745
- });
746
- stepIdToJobId.set(step.id, jobId);
747
- results.push({ stepId: step.id, jobId, status: resolvedDeps?.length ? "pending" : "cloning" });
803
+ if (step.branches && step.branches > 1) {
804
+ // Evolutionary mode: spawn N variant jobs in parallel
805
+ const branchEval = step.branch_eval ?? "test_pass_rate";
806
+ const branchSelect = step.branch_select ?? "best_score";
807
+ const variantJobIds = [];
808
+ const variantBranches = [];
809
+ for (let i = 1; i <= step.branches; i++) {
810
+ const branchName = step.create_branch ? `${step.create_branch}-v${i}` : undefined;
811
+ variantBranches.push(branchName);
812
+ const jobId = await manager.spawn({
813
+ repoUrl: step.repo_url,
814
+ task: step.task,
815
+ createBranch: branchName,
816
+ dependsOn: resolvedDeps,
817
+ variantIndex: i,
818
+ });
819
+ variantJobIds.push(jobId);
820
+ }
821
+ // Update siblings on all variant jobs
822
+ for (const jobId of variantJobIds) {
823
+ manager.setJobSiblings(jobId, variantJobIds.filter((id) => id !== jobId));
824
+ }
825
+ // Build evaluator task and spawn evaluator job
826
+ const evalTask = buildEvaluatorTask({
827
+ variantJobIds,
828
+ variantBranches,
829
+ branchEval,
830
+ branchSelect,
831
+ stepId: step.id,
832
+ });
833
+ const evalJobId = await manager.spawn({
834
+ repoUrl: step.repo_url,
835
+ task: evalTask,
836
+ dependsOn: variantJobIds,
837
+ });
838
+ // The logical step ID maps to the evaluator job (so subsequent steps depend on it)
839
+ stepIdToJobId.set(step.id, evalJobId);
840
+ // Track variant jobs
841
+ for (let i = 0; i < variantJobIds.length; i++) {
842
+ results.push({
843
+ stepId: `${step.id}-v${i + 1}`,
844
+ jobId: variantJobIds[i],
845
+ status: resolvedDeps?.length ? "pending" : "cloning",
846
+ role: "variant",
847
+ });
848
+ }
849
+ // Track evaluator job
850
+ results.push({
851
+ stepId: step.id,
852
+ jobId: evalJobId,
853
+ status: "pending",
854
+ role: "evaluator",
855
+ });
856
+ }
857
+ else {
858
+ // Standard single job
859
+ const jobId = await manager.spawn({
860
+ repoUrl: step.repo_url,
861
+ task: step.task,
862
+ createBranch: step.create_branch,
863
+ dependsOn: resolvedDeps,
864
+ });
865
+ stepIdToJobId.set(step.id, jobId);
866
+ results.push({ stepId: step.id, jobId, status: resolvedDeps?.length ? "pending" : "cloning" });
867
+ }
748
868
  }
749
- // Persist the plan record
750
869
  const planId = uuidv4();
751
870
  planStore.savePlan({ id: planId, goal, steps: results, createdAt: new Date().toISOString() }).catch(() => { });
752
871
  return {
@@ -903,6 +1022,13 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
903
1022
  content: [{ type: "text", text: JSON.stringify(result) }],
904
1023
  };
905
1024
  }
1025
+ case "set_job_score": {
1026
+ logger.info("tool:set_job_score", { job_id: a.job_id, score: a.score });
1027
+ const result = manager.setJobScore(a.job_id, a.score, a.reason);
1028
+ return {
1029
+ content: [{ type: "text", text: JSON.stringify(result) }],
1030
+ };
1031
+ }
906
1032
  case "get_learnings": {
907
1033
  const ns = a.namespace ?? getNamespace();
908
1034
  const limit = typeof a.limit === "number" ? a.limit : 10;
@@ -926,6 +1052,16 @@ server.setRequestHandler(CallToolRequestSchema, async (req) => {
926
1052
  }],
927
1053
  };
928
1054
  }
1055
+ case "docker_ps": {
1056
+ logger.info("tool:docker_ps");
1057
+ const containers = await listCcAgentContainers();
1058
+ return {
1059
+ content: [{
1060
+ type: "text",
1061
+ text: JSON.stringify({ containers, total: containers.length }),
1062
+ }],
1063
+ };
1064
+ }
929
1065
  default:
930
1066
  throw new Error(`Unknown tool: ${name}`);
931
1067
  }