ralphctl 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/{add-TGJTRHIF.mjs → add-3T225IX5.mjs} +3 -3
- package/dist/{add-SEDQ3VK7.mjs → add-6A5432U2.mjs} +4 -4
- package/dist/{chunk-XPDI4SYI.mjs → chunk-742XQ7FL.mjs} +3 -3
- package/dist/{chunk-XQHEKKDN.mjs → chunk-DUU5346E.mjs} +1 -1
- package/dist/{chunk-LG6B7QVO.mjs → chunk-EUNAUHC3.mjs} +1 -1
- package/dist/{chunk-ZDEVRTGY.mjs → chunk-IB6OCKZW.mjs} +24 -2
- package/dist/{chunk-KPTPKLXY.mjs → chunk-JRFOUFD3.mjs} +1 -1
- package/dist/{chunk-XXIHDQOH.mjs → chunk-U62BX47C.mjs} +508 -173
- package/dist/{chunk-Q3VWJARJ.mjs → chunk-UBPZHHCD.mjs} +2 -2
- package/dist/cli.mjs +105 -16
- package/dist/{create-DJHCP7LN.mjs → create-MYGOWO2F.mjs} +3 -3
- package/dist/{handle-CCTBNAJZ.mjs → handle-TA4MYNQJ.mjs} +1 -1
- package/dist/{project-ZYGNPVGL.mjs → project-YONEJICR.mjs} +2 -2
- package/dist/prompts/ideate-auto.md +9 -5
- package/dist/prompts/ideate.md +28 -12
- package/dist/prompts/plan-auto.md +26 -16
- package/dist/prompts/plan-common.md +67 -22
- package/dist/prompts/plan-interactive.md +26 -27
- package/dist/prompts/task-evaluation-resume.md +22 -0
- package/dist/prompts/task-evaluation.md +146 -24
- package/dist/prompts/task-execution.md +58 -36
- package/dist/prompts/ticket-refine.md +24 -20
- package/dist/{resolver-L52KR4GY.mjs → resolver-RXEY6EJE.mjs} +2 -2
- package/dist/{sprint-LUXAV3Q3.mjs → sprint-FGLWYWKX.mjs} +2 -2
- package/dist/{wizard-D7N5WZ5H.mjs → wizard-HWOH2HPV.mjs} +6 -6
- package/package.json +6 -6
- package/schemas/task-import.schema.json +7 -0
- package/schemas/tasks.schema.json +18 -1
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
getPendingRequirements,
|
|
12
12
|
groupTicketsByProject,
|
|
13
13
|
listTickets
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-742XQ7FL.mjs";
|
|
15
15
|
import {
|
|
16
16
|
EXIT_ALL_BLOCKED,
|
|
17
17
|
EXIT_ERROR,
|
|
@@ -23,7 +23,7 @@ import {
|
|
|
23
23
|
import {
|
|
24
24
|
getProject,
|
|
25
25
|
listProjects
|
|
26
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
27
27
|
import {
|
|
28
28
|
activateSprint,
|
|
29
29
|
assertSprintStatus,
|
|
@@ -40,7 +40,7 @@ import {
|
|
|
40
40
|
setAiProvider,
|
|
41
41
|
summarizeProgressForContext,
|
|
42
42
|
withFileLock
|
|
43
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-JRFOUFD3.mjs";
|
|
44
44
|
import {
|
|
45
45
|
ensureError,
|
|
46
46
|
unwrapOrThrow,
|
|
@@ -50,9 +50,11 @@ import {
|
|
|
50
50
|
ImportTasksSchema,
|
|
51
51
|
RefinedRequirementsSchema,
|
|
52
52
|
TasksSchema,
|
|
53
|
+
appendToFile,
|
|
53
54
|
assertSafeCwd,
|
|
54
55
|
ensureDir,
|
|
55
56
|
fileExists,
|
|
57
|
+
getEvaluationFilePath,
|
|
56
58
|
getPlanningDir,
|
|
57
59
|
getProgressFilePath,
|
|
58
60
|
getRefinementDir,
|
|
@@ -61,7 +63,7 @@ import {
|
|
|
61
63
|
getTasksFilePath,
|
|
62
64
|
readValidatedJson,
|
|
63
65
|
writeValidatedJson
|
|
64
|
-
} from "./chunk-
|
|
66
|
+
} from "./chunk-IB6OCKZW.mjs";
|
|
65
67
|
import {
|
|
66
68
|
DependencyCycleError,
|
|
67
69
|
IOError,
|
|
@@ -162,10 +164,18 @@ function buildEvaluatorPrompt(ctx) {
|
|
|
162
164
|
const stepsSection = ctx.taskSteps.length > 0 ? `
|
|
163
165
|
**Implementation Steps:**
|
|
164
166
|
${ctx.taskSteps.map((s) => `- ${s}`).join("\n")}` : "";
|
|
167
|
+
const criteriaSection = ctx.verificationCriteria.length > 0 ? `
|
|
168
|
+
**Verification Criteria:**
|
|
169
|
+
${ctx.verificationCriteria.map((c) => `- ${c}`).join("\n")}` : "";
|
|
165
170
|
const checkSection = ctx.checkScriptSection ? `
|
|
166
171
|
|
|
167
172
|
${ctx.checkScriptSection}` : "";
|
|
168
|
-
return template.replaceAll("{{TASK_NAME}}", ctx.taskName).replace("{{TASK_DESCRIPTION_SECTION}}", descriptionSection).replace("{{TASK_STEPS_SECTION}}", stepsSection).replace("{{PROJECT_PATH}}", ctx.projectPath).replace("{{CHECK_SCRIPT_SECTION}}", checkSection);
|
|
173
|
+
return template.replaceAll("{{TASK_NAME}}", ctx.taskName).replace("{{TASK_DESCRIPTION_SECTION}}", descriptionSection).replace("{{TASK_STEPS_SECTION}}", stepsSection).replace("{{VERIFICATION_CRITERIA_SECTION}}", criteriaSection).replace("{{PROJECT_PATH}}", ctx.projectPath).replace("{{CHECK_SCRIPT_SECTION}}", checkSection).replace("{{PROJECT_TOOLING_SECTION}}", ctx.projectToolingSection);
|
|
174
|
+
}
|
|
175
|
+
function buildEvaluationResumePrompt(ctx) {
|
|
176
|
+
const template = loadTemplate("task-evaluation-resume");
|
|
177
|
+
const commitInstruction = ctx.needsCommit ? "\n - **Then commit the fix** with a descriptive message before signaling completion." : "";
|
|
178
|
+
return template.replace("{{CRITIQUE}}", ctx.critique).replace("{{COMMIT_INSTRUCTION}}", commitInstruction);
|
|
169
179
|
}
|
|
170
180
|
|
|
171
181
|
// src/utils/requirements-export.ts
|
|
@@ -1087,6 +1097,7 @@ async function addTask(input3, sprintId) {
|
|
|
1087
1097
|
name: input3.name,
|
|
1088
1098
|
description: input3.description,
|
|
1089
1099
|
steps: input3.steps ?? [],
|
|
1100
|
+
verificationCriteria: input3.verificationCriteria ?? [],
|
|
1090
1101
|
status: "todo",
|
|
1091
1102
|
order: maxOrder + 1,
|
|
1092
1103
|
ticketId: input3.ticketId,
|
|
@@ -1159,6 +1170,12 @@ async function updateTask(taskId, updates, sprintId) {
|
|
|
1159
1170
|
if (updates.evaluationOutput !== void 0) {
|
|
1160
1171
|
task.evaluationOutput = updates.evaluationOutput;
|
|
1161
1172
|
}
|
|
1173
|
+
if (updates.evaluationStatus !== void 0) {
|
|
1174
|
+
task.evaluationStatus = updates.evaluationStatus;
|
|
1175
|
+
}
|
|
1176
|
+
if (updates.evaluationFile !== void 0) {
|
|
1177
|
+
task.evaluationFile = updates.evaluationFile;
|
|
1178
|
+
}
|
|
1162
1179
|
await saveTasks(tasks, id);
|
|
1163
1180
|
return task;
|
|
1164
1181
|
});
|
|
@@ -1320,6 +1337,7 @@ function validateImportTasks(importTasks2, existingTasks, ticketIds) {
|
|
|
1320
1337
|
name: t.name,
|
|
1321
1338
|
description: void 0,
|
|
1322
1339
|
steps: [],
|
|
1340
|
+
verificationCriteria: [],
|
|
1323
1341
|
status: "todo",
|
|
1324
1342
|
order: existingTasks.length + i + 1,
|
|
1325
1343
|
ticketId: void 0,
|
|
@@ -1355,7 +1373,7 @@ async function selectProject(message = "Select project:") {
|
|
|
1355
1373
|
default: true
|
|
1356
1374
|
});
|
|
1357
1375
|
if (create) {
|
|
1358
|
-
const { projectAddCommand } = await import("./add-
|
|
1376
|
+
const { projectAddCommand } = await import("./add-3T225IX5.mjs");
|
|
1359
1377
|
await projectAddCommand({ interactive: true });
|
|
1360
1378
|
const updated = await listProjects();
|
|
1361
1379
|
if (updated.length === 0) return null;
|
|
@@ -1428,7 +1446,7 @@ async function selectSprint(message = "Select sprint:", filter) {
|
|
|
1428
1446
|
default: true
|
|
1429
1447
|
});
|
|
1430
1448
|
if (create) {
|
|
1431
|
-
const { sprintCreateCommand } = await import("./create-
|
|
1449
|
+
const { sprintCreateCommand } = await import("./create-MYGOWO2F.mjs");
|
|
1432
1450
|
await sprintCreateCommand({ interactive: true });
|
|
1433
1451
|
const updated = await listSprints();
|
|
1434
1452
|
const refiltered = filter ? updated.filter((s) => filter.includes(s.status)) : updated;
|
|
@@ -1463,7 +1481,7 @@ async function selectTicket(message = "Select ticket:", filter) {
|
|
|
1463
1481
|
default: true
|
|
1464
1482
|
});
|
|
1465
1483
|
if (create) {
|
|
1466
|
-
const { ticketAddCommand } = await import("./add-
|
|
1484
|
+
const { ticketAddCommand } = await import("./add-6A5432U2.mjs");
|
|
1467
1485
|
await ticketAddCommand({ interactive: true });
|
|
1468
1486
|
const updated = await listTickets();
|
|
1469
1487
|
const refiltered = filter ? updated.filter(filter) : updated;
|
|
@@ -1658,6 +1676,7 @@ async function importTasksReplace(tasks, sprintId) {
|
|
|
1658
1676
|
name: taskInput.name,
|
|
1659
1677
|
description: taskInput.description,
|
|
1660
1678
|
steps: taskInput.steps ?? [],
|
|
1679
|
+
verificationCriteria: taskInput.verificationCriteria ?? [],
|
|
1661
1680
|
status: "todo",
|
|
1662
1681
|
order: newTasks.length + 1,
|
|
1663
1682
|
ticketId: taskInput.ticketId,
|
|
@@ -2053,15 +2072,165 @@ async function sprintPlanCommand(args) {
|
|
|
2053
2072
|
}
|
|
2054
2073
|
|
|
2055
2074
|
// src/commands/sprint/start.ts
|
|
2056
|
-
import { Result as
|
|
2075
|
+
import { Result as Result10 } from "typescript-result";
|
|
2057
2076
|
|
|
2058
2077
|
// src/ai/runner.ts
|
|
2059
2078
|
import { confirm as confirm5, input as input2, select as select2 } from "@inquirer/prompts";
|
|
2060
|
-
import { Result as
|
|
2079
|
+
import { Result as Result9 } from "typescript-result";
|
|
2061
2080
|
|
|
2062
2081
|
// src/ai/executor.ts
|
|
2063
2082
|
import { confirm as confirm4 } from "@inquirer/prompts";
|
|
2064
2083
|
import { readFile as readFile4, unlink as unlink2 } from "fs/promises";
|
|
2084
|
+
import { Result as Result8 } from "typescript-result";
|
|
2085
|
+
|
|
2086
|
+
// src/utils/git.ts
|
|
2087
|
+
import { spawnSync as spawnSync2 } from "child_process";
|
|
2088
|
+
var BRANCH_NAME_RE = /^[a-zA-Z0-9/_.-]+$/;
|
|
2089
|
+
var BRANCH_NAME_INVALID_PATTERNS = [/\.\./, /\.$/, /\/$/, /\.lock$/, /^-/, /\/\//];
|
|
2090
|
+
function isValidBranchName(name) {
|
|
2091
|
+
if (!name || name.length > 250) return false;
|
|
2092
|
+
if (!BRANCH_NAME_RE.test(name)) return false;
|
|
2093
|
+
for (const pattern of BRANCH_NAME_INVALID_PATTERNS) {
|
|
2094
|
+
if (pattern.test(name)) return false;
|
|
2095
|
+
}
|
|
2096
|
+
return true;
|
|
2097
|
+
}
|
|
2098
|
+
function getCurrentBranch(cwd) {
|
|
2099
|
+
assertSafeCwd(cwd);
|
|
2100
|
+
const result = spawnSync2("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
|
|
2101
|
+
cwd,
|
|
2102
|
+
encoding: "utf-8",
|
|
2103
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2104
|
+
});
|
|
2105
|
+
if (result.status !== 0) {
|
|
2106
|
+
throw new Error(`Failed to get current branch in ${cwd}: ${result.stderr.trim()}`);
|
|
2107
|
+
}
|
|
2108
|
+
return result.stdout.trim();
|
|
2109
|
+
}
|
|
2110
|
+
function branchExists(cwd, name) {
|
|
2111
|
+
assertSafeCwd(cwd);
|
|
2112
|
+
if (!isValidBranchName(name)) {
|
|
2113
|
+
throw new Error(`Invalid branch name: ${name}`);
|
|
2114
|
+
}
|
|
2115
|
+
const result = spawnSync2("git", ["show-ref", "--verify", `refs/heads/${name}`], {
|
|
2116
|
+
cwd,
|
|
2117
|
+
encoding: "utf-8",
|
|
2118
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2119
|
+
});
|
|
2120
|
+
return result.status === 0;
|
|
2121
|
+
}
|
|
2122
|
+
function createAndCheckoutBranch(cwd, name) {
|
|
2123
|
+
assertSafeCwd(cwd);
|
|
2124
|
+
if (!isValidBranchName(name)) {
|
|
2125
|
+
throw new Error(`Invalid branch name: ${name}`);
|
|
2126
|
+
}
|
|
2127
|
+
const current = getCurrentBranch(cwd);
|
|
2128
|
+
if (current === name) {
|
|
2129
|
+
return;
|
|
2130
|
+
}
|
|
2131
|
+
if (branchExists(cwd, name)) {
|
|
2132
|
+
const result = spawnSync2("git", ["checkout", name], {
|
|
2133
|
+
cwd,
|
|
2134
|
+
encoding: "utf-8",
|
|
2135
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2136
|
+
});
|
|
2137
|
+
if (result.status !== 0) {
|
|
2138
|
+
throw new Error(`Failed to checkout branch '${name}' in ${cwd}: ${result.stderr.trim()}`);
|
|
2139
|
+
}
|
|
2140
|
+
} else {
|
|
2141
|
+
const result = spawnSync2("git", ["checkout", "-b", name], {
|
|
2142
|
+
cwd,
|
|
2143
|
+
encoding: "utf-8",
|
|
2144
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2145
|
+
});
|
|
2146
|
+
if (result.status !== 0) {
|
|
2147
|
+
throw new Error(`Failed to create branch '${name}' in ${cwd}: ${result.stderr.trim()}`);
|
|
2148
|
+
}
|
|
2149
|
+
}
|
|
2150
|
+
}
|
|
2151
|
+
function verifyCurrentBranch(cwd, expected) {
|
|
2152
|
+
const current = getCurrentBranch(cwd);
|
|
2153
|
+
return current === expected;
|
|
2154
|
+
}
|
|
2155
|
+
function getDefaultBranch(cwd) {
|
|
2156
|
+
assertSafeCwd(cwd);
|
|
2157
|
+
const result = spawnSync2("git", ["symbolic-ref", "refs/remotes/origin/HEAD"], {
|
|
2158
|
+
cwd,
|
|
2159
|
+
encoding: "utf-8",
|
|
2160
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2161
|
+
});
|
|
2162
|
+
if (result.status === 0) {
|
|
2163
|
+
const ref = result.stdout.trim();
|
|
2164
|
+
const parts = ref.split("/");
|
|
2165
|
+
return parts[parts.length - 1] ?? "main";
|
|
2166
|
+
}
|
|
2167
|
+
const stderr = result.stderr.trim();
|
|
2168
|
+
if (stderr.includes("is not a symbolic ref") || stderr.includes("No such ref")) {
|
|
2169
|
+
if (branchExists(cwd, "main")) return "main";
|
|
2170
|
+
if (branchExists(cwd, "master")) return "master";
|
|
2171
|
+
return "main";
|
|
2172
|
+
}
|
|
2173
|
+
throw new Error(`Failed to detect default branch in ${cwd}: ${stderr}`);
|
|
2174
|
+
}
|
|
2175
|
+
function getHeadSha(cwd) {
|
|
2176
|
+
try {
|
|
2177
|
+
assertSafeCwd(cwd);
|
|
2178
|
+
const result = spawnSync2("git", ["rev-parse", "HEAD"], {
|
|
2179
|
+
cwd,
|
|
2180
|
+
encoding: "utf-8",
|
|
2181
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2182
|
+
});
|
|
2183
|
+
if (result.status !== 0) return null;
|
|
2184
|
+
return result.stdout.trim() || null;
|
|
2185
|
+
} catch {
|
|
2186
|
+
return null;
|
|
2187
|
+
}
|
|
2188
|
+
}
|
|
2189
|
+
function hasUncommittedChanges(cwd) {
|
|
2190
|
+
assertSafeCwd(cwd);
|
|
2191
|
+
const result = spawnSync2("git", ["status", "--porcelain"], {
|
|
2192
|
+
cwd,
|
|
2193
|
+
encoding: "utf-8",
|
|
2194
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2195
|
+
});
|
|
2196
|
+
if (result.status !== 0) {
|
|
2197
|
+
throw new Error(`Failed to check git status in ${cwd}: ${result.stderr.trim()}`);
|
|
2198
|
+
}
|
|
2199
|
+
return result.stdout.trim().length > 0;
|
|
2200
|
+
}
|
|
2201
|
+
function generateBranchName(sprintId) {
|
|
2202
|
+
return `ralphctl/${sprintId}`;
|
|
2203
|
+
}
|
|
2204
|
+
function isGhAvailable() {
|
|
2205
|
+
const result = spawnSync2("gh", ["--version"], {
|
|
2206
|
+
encoding: "utf-8",
|
|
2207
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2208
|
+
});
|
|
2209
|
+
return result.status === 0;
|
|
2210
|
+
}
|
|
2211
|
+
function isGlabAvailable() {
|
|
2212
|
+
const result = spawnSync2("glab", ["--version"], {
|
|
2213
|
+
encoding: "utf-8",
|
|
2214
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2215
|
+
});
|
|
2216
|
+
return result.status === 0;
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
// src/store/evaluation.ts
|
|
2220
|
+
async function writeEvaluation(sprintId, taskId, iteration, status, body) {
|
|
2221
|
+
const filePath = getEvaluationFilePath(sprintId, taskId);
|
|
2222
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
2223
|
+
const header = `## ${timestamp} \u2014 Iteration ${String(iteration)} \u2014 ${status.toUpperCase()}
|
|
2224
|
+
|
|
2225
|
+
`;
|
|
2226
|
+
const entry = `${header}${body.trimEnd()}
|
|
2227
|
+
|
|
2228
|
+
---
|
|
2229
|
+
|
|
2230
|
+
`;
|
|
2231
|
+
unwrapOrThrow(await appendToFile(filePath, entry));
|
|
2232
|
+
return filePath;
|
|
2233
|
+
}
|
|
2065
2234
|
|
|
2066
2235
|
// src/ai/parser.ts
|
|
2067
2236
|
function parseExecutionResult(output) {
|
|
@@ -2321,6 +2490,16 @@ function formatTask(ctx) {
|
|
|
2321
2490
|
lines.push(`${String(i + 1)}. ${step}`);
|
|
2322
2491
|
});
|
|
2323
2492
|
}
|
|
2493
|
+
if (ctx.task.verificationCriteria.length > 0) {
|
|
2494
|
+
lines.push("");
|
|
2495
|
+
lines.push("## Verification Criteria");
|
|
2496
|
+
lines.push("");
|
|
2497
|
+
lines.push("The task is done when all of the following are true:");
|
|
2498
|
+
lines.push("");
|
|
2499
|
+
ctx.task.verificationCriteria.forEach((criterion) => {
|
|
2500
|
+
lines.push(`- ${criterion}`);
|
|
2501
|
+
});
|
|
2502
|
+
}
|
|
2324
2503
|
return lines.join("\n");
|
|
2325
2504
|
}
|
|
2326
2505
|
function buildFullTaskContext(ctx, progressSummary, gitHistory, checkScript, checkStatus) {
|
|
@@ -2439,7 +2618,7 @@ function runPermissionCheck(ctx, noCommit, provider) {
|
|
|
2439
2618
|
}
|
|
2440
2619
|
|
|
2441
2620
|
// src/ai/lifecycle.ts
|
|
2442
|
-
import { spawnSync as
|
|
2621
|
+
import { spawnSync as spawnSync3 } from "child_process";
|
|
2443
2622
|
var DEFAULT_HOOK_TIMEOUT_MS = 5 * 60 * 1e3;
|
|
2444
2623
|
function getHookTimeoutMs() {
|
|
2445
2624
|
const envVal = process.env["RALPHCTL_SETUP_TIMEOUT_MS"];
|
|
@@ -2452,7 +2631,7 @@ function getHookTimeoutMs() {
|
|
|
2452
2631
|
function runLifecycleHook(projectPath, script, event, timeoutOverrideMs) {
|
|
2453
2632
|
assertSafeCwd(projectPath);
|
|
2454
2633
|
const timeoutMs = timeoutOverrideMs ?? getHookTimeoutMs();
|
|
2455
|
-
const result =
|
|
2634
|
+
const result = spawnSync3(script, {
|
|
2456
2635
|
cwd: projectPath,
|
|
2457
2636
|
shell: true,
|
|
2458
2637
|
stdio: ["pipe", "pipe", "pipe"],
|
|
@@ -2464,7 +2643,142 @@ function runLifecycleHook(projectPath, script, event, timeoutOverrideMs) {
|
|
|
2464
2643
|
return { passed: result.status === 0, output };
|
|
2465
2644
|
}
|
|
2466
2645
|
|
|
2646
|
+
// src/ai/project-tooling.ts
|
|
2647
|
+
import { existsSync as existsSync3, readdirSync, readFileSync as readFileSync3 } from "fs";
|
|
2648
|
+
import { join as join8 } from "path";
|
|
2649
|
+
var EMPTY_TOOLING = {
|
|
2650
|
+
agents: [],
|
|
2651
|
+
skills: [],
|
|
2652
|
+
mcpServers: [],
|
|
2653
|
+
hasClaudeMd: false,
|
|
2654
|
+
hasAgentsMd: false,
|
|
2655
|
+
hasCopilotInstructions: false
|
|
2656
|
+
};
|
|
2657
|
+
function safeListDir(path, predicate) {
|
|
2658
|
+
try {
|
|
2659
|
+
if (!existsSync3(path)) return [];
|
|
2660
|
+
return readdirSync(path).filter(predicate).sort();
|
|
2661
|
+
} catch {
|
|
2662
|
+
return [];
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
var EVALUATOR_DENYLISTED_AGENTS = /* @__PURE__ */ new Set(["implementer", "planner"]);
|
|
2666
|
+
function detectAgents(projectPath) {
|
|
2667
|
+
const agentsDir = join8(projectPath, ".claude", "agents");
|
|
2668
|
+
return safeListDir(agentsDir, (name) => name.endsWith(".md")).map((name) => name.replace(/\.md$/, "")).filter((name) => !EVALUATOR_DENYLISTED_AGENTS.has(name));
|
|
2669
|
+
}
|
|
2670
|
+
function detectSkills(projectPath) {
|
|
2671
|
+
const skillsDir = join8(projectPath, ".claude", "skills");
|
|
2672
|
+
try {
|
|
2673
|
+
if (!existsSync3(skillsDir)) return [];
|
|
2674
|
+
return readdirSync(skillsDir, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).sort();
|
|
2675
|
+
} catch {
|
|
2676
|
+
return [];
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
function detectMcpServers(projectPath) {
|
|
2680
|
+
const mcpFile = join8(projectPath, ".mcp.json");
|
|
2681
|
+
if (!existsSync3(mcpFile)) return [];
|
|
2682
|
+
try {
|
|
2683
|
+
const raw = readFileSync3(mcpFile, "utf-8");
|
|
2684
|
+
const parsed = JSON.parse(raw);
|
|
2685
|
+
const servers = parsed.mcpServers;
|
|
2686
|
+
if (!servers || typeof servers !== "object") return [];
|
|
2687
|
+
return Object.keys(servers).sort();
|
|
2688
|
+
} catch {
|
|
2689
|
+
return [];
|
|
2690
|
+
}
|
|
2691
|
+
}
|
|
2692
|
+
function detectProjectTooling(projectPath) {
|
|
2693
|
+
if (!projectPath || !existsSync3(projectPath)) {
|
|
2694
|
+
return EMPTY_TOOLING;
|
|
2695
|
+
}
|
|
2696
|
+
return {
|
|
2697
|
+
agents: detectAgents(projectPath),
|
|
2698
|
+
skills: detectSkills(projectPath),
|
|
2699
|
+
mcpServers: detectMcpServers(projectPath),
|
|
2700
|
+
hasClaudeMd: existsSync3(join8(projectPath, "CLAUDE.md")),
|
|
2701
|
+
hasAgentsMd: existsSync3(join8(projectPath, "AGENTS.md")),
|
|
2702
|
+
hasCopilotInstructions: existsSync3(join8(projectPath, ".github", "copilot-instructions.md"))
|
|
2703
|
+
};
|
|
2704
|
+
}
|
|
2705
|
+
function renderProjectToolingSection(tooling) {
|
|
2706
|
+
const hasAny = tooling.agents.length > 0 || tooling.skills.length > 0 || tooling.mcpServers.length > 0 || tooling.hasClaudeMd || tooling.hasAgentsMd || tooling.hasCopilotInstructions;
|
|
2707
|
+
if (!hasAny) return "";
|
|
2708
|
+
const lines = [];
|
|
2709
|
+
lines.push("## Project Tooling (use these \u2014 they exist for a reason)");
|
|
2710
|
+
lines.push("");
|
|
2711
|
+
lines.push(
|
|
2712
|
+
"This project ships with tooling that you should prefer over generic approaches. Verification and evaluation must adapt to the project\u2019s actual stack and the agents, skills, and MCP servers it has installed."
|
|
2713
|
+
);
|
|
2714
|
+
lines.push("");
|
|
2715
|
+
if (tooling.agents.length > 0) {
|
|
2716
|
+
lines.push("### Subagents available");
|
|
2717
|
+
lines.push("");
|
|
2718
|
+
lines.push("Delegate via the Task tool with `subagent_type=<name>` when the diff matches a specialty:");
|
|
2719
|
+
for (const agent of tooling.agents) {
|
|
2720
|
+
const hint = describeAgentHint(agent);
|
|
2721
|
+
lines.push(`- \`${agent}\`${hint ? ` \u2014 ${hint}` : ""}`);
|
|
2722
|
+
}
|
|
2723
|
+
lines.push("");
|
|
2724
|
+
}
|
|
2725
|
+
if (tooling.skills.length > 0) {
|
|
2726
|
+
lines.push("### Skills available");
|
|
2727
|
+
lines.push("");
|
|
2728
|
+
lines.push("Invoke via the Skill tool when the skill name matches the work in front of you:");
|
|
2729
|
+
for (const skill of tooling.skills) {
|
|
2730
|
+
lines.push(`- \`${skill}\``);
|
|
2731
|
+
}
|
|
2732
|
+
lines.push("");
|
|
2733
|
+
}
|
|
2734
|
+
if (tooling.mcpServers.length > 0) {
|
|
2735
|
+
lines.push("### MCP servers available");
|
|
2736
|
+
lines.push("");
|
|
2737
|
+
lines.push(
|
|
2738
|
+
"These give you tools beyond the filesystem. Use them to **interact with the running system**, not just read its source."
|
|
2739
|
+
);
|
|
2740
|
+
for (const server of tooling.mcpServers) {
|
|
2741
|
+
const hint = describeMcpHint(server);
|
|
2742
|
+
lines.push(`- \`${server}\`${hint ? ` \u2014 ${hint}` : ""}`);
|
|
2743
|
+
}
|
|
2744
|
+
lines.push("");
|
|
2745
|
+
}
|
|
2746
|
+
const instructionFiles = [];
|
|
2747
|
+
if (tooling.hasClaudeMd) instructionFiles.push("`CLAUDE.md`");
|
|
2748
|
+
if (tooling.hasAgentsMd) instructionFiles.push("`AGENTS.md`");
|
|
2749
|
+
if (tooling.hasCopilotInstructions) instructionFiles.push("`.github/copilot-instructions.md`");
|
|
2750
|
+
if (instructionFiles.length > 0) {
|
|
2751
|
+
lines.push("### Project instructions");
|
|
2752
|
+
lines.push("");
|
|
2753
|
+
lines.push(
|
|
2754
|
+
`Read ${instructionFiles.join(" / ")} for project-specific verification commands, conventions, and constraints. If no check script is configured, derive verification commands from these files (e.g. \`package.json\` scripts referenced there).`
|
|
2755
|
+
);
|
|
2756
|
+
lines.push("");
|
|
2757
|
+
}
|
|
2758
|
+
return lines.join("\n");
|
|
2759
|
+
}
|
|
2760
|
+
function describeAgentHint(name) {
|
|
2761
|
+
const hints = {
|
|
2762
|
+
auditor: "use for security-sensitive diffs (auth, input handling, file IO, secrets)",
|
|
2763
|
+
reviewer: "use for general code-quality review of the diff",
|
|
2764
|
+
tester: "use to assess test coverage and quality of new tests",
|
|
2765
|
+
designer: "use for UI/UX/theming changes"
|
|
2766
|
+
};
|
|
2767
|
+
return hints[name] ?? null;
|
|
2768
|
+
}
|
|
2769
|
+
function describeMcpHint(name) {
|
|
2770
|
+
const lower = name.toLowerCase();
|
|
2771
|
+
if (lower.includes("playwright")) return "use for any UI/frontend task \u2014 click through the changed flow";
|
|
2772
|
+
if (lower.includes("puppeteer")) return "use for browser automation on UI changes";
|
|
2773
|
+
if (lower.includes("github")) return "use to inspect related PRs/issues for context";
|
|
2774
|
+
if (lower.includes("postgres") || lower.includes("mysql") || lower.includes("sqlite")) {
|
|
2775
|
+
return "use to verify database schema/migration changes against a real DB";
|
|
2776
|
+
}
|
|
2777
|
+
return null;
|
|
2778
|
+
}
|
|
2779
|
+
|
|
2467
2780
|
// src/ai/evaluator.ts
|
|
2781
|
+
var EVALUATOR_MAX_TURNS = 100;
|
|
2468
2782
|
function getEvaluatorModel(generatorModel, provider) {
|
|
2469
2783
|
if (provider.name !== "claude" || !generatorModel) return null;
|
|
2470
2784
|
const modelLower = generatorModel.toLowerCase();
|
|
@@ -2472,54 +2786,92 @@ function getEvaluatorModel(generatorModel, provider) {
|
|
|
2472
2786
|
if (modelLower.includes("sonnet")) return "claude-haiku-4-5";
|
|
2473
2787
|
return "claude-haiku-4-5";
|
|
2474
2788
|
}
|
|
2789
|
+
var DIMENSION_NAMES = ["correctness", "completeness", "safety", "consistency"];
|
|
2790
|
+
var DIMENSION_PATTERNS = {
|
|
2791
|
+
correctness: /\*\*correctness\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i,
|
|
2792
|
+
completeness: /\*\*completeness\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i,
|
|
2793
|
+
safety: /\*\*safety\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i,
|
|
2794
|
+
consistency: /\*\*consistency\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i
|
|
2795
|
+
};
|
|
2796
|
+
function parseDimensionScores(output) {
|
|
2797
|
+
const scores = [];
|
|
2798
|
+
for (const dim of DIMENSION_NAMES) {
|
|
2799
|
+
const match = DIMENSION_PATTERNS[dim].exec(output);
|
|
2800
|
+
if (match?.[1] && match[2]) {
|
|
2801
|
+
scores.push({
|
|
2802
|
+
dimension: dim,
|
|
2803
|
+
passed: match[1].toUpperCase() === "PASS",
|
|
2804
|
+
finding: match[2].trim()
|
|
2805
|
+
});
|
|
2806
|
+
}
|
|
2807
|
+
}
|
|
2808
|
+
return scores;
|
|
2809
|
+
}
|
|
2475
2810
|
function parseEvaluationResult(output) {
|
|
2811
|
+
const dimensions = parseDimensionScores(output);
|
|
2476
2812
|
if (output.includes("<evaluation-passed>")) {
|
|
2477
|
-
return { passed: true, output };
|
|
2813
|
+
return { passed: true, status: "passed", output, dimensions };
|
|
2478
2814
|
}
|
|
2479
2815
|
const failedMatch = /<evaluation-failed>([\s\S]*?)<\/evaluation-failed>/.exec(output);
|
|
2480
2816
|
if (failedMatch) {
|
|
2481
|
-
return { passed: false, output: failedMatch[1]?.trim() ?? output };
|
|
2817
|
+
return { passed: false, status: "failed", output: failedMatch[1]?.trim() ?? output, dimensions };
|
|
2482
2818
|
}
|
|
2483
|
-
|
|
2819
|
+
if (dimensions.length > 0) {
|
|
2820
|
+
return { passed: false, status: "failed", output, dimensions };
|
|
2821
|
+
}
|
|
2822
|
+
return { passed: false, status: "malformed", output, dimensions };
|
|
2484
2823
|
}
|
|
2485
2824
|
function buildEvaluatorContext(task, checkScript) {
|
|
2486
|
-
const checkScriptSection = checkScript ? `## Check Script
|
|
2825
|
+
const checkScriptSection = checkScript ? `## Check Script (Computational Gate)
|
|
2487
2826
|
|
|
2488
|
-
|
|
2827
|
+
Run this check script as the **first step** of your review \u2014 it is the same gate the harness uses post-task:
|
|
2489
2828
|
|
|
2490
2829
|
\`\`\`
|
|
2491
2830
|
${checkScript}
|
|
2492
2831
|
\`\`\`
|
|
2493
2832
|
|
|
2494
|
-
|
|
2833
|
+
If this script fails, the implementation fails regardless of code quality. Record the full output.` : null;
|
|
2834
|
+
const tooling = detectProjectTooling(task.projectPath);
|
|
2835
|
+
const projectToolingSection = renderProjectToolingSection(tooling);
|
|
2495
2836
|
return {
|
|
2496
2837
|
taskName: task.name,
|
|
2497
2838
|
taskDescription: task.description ?? "",
|
|
2498
2839
|
taskSteps: task.steps,
|
|
2840
|
+
verificationCriteria: task.verificationCriteria,
|
|
2499
2841
|
projectPath: task.projectPath,
|
|
2500
|
-
checkScriptSection
|
|
2842
|
+
checkScriptSection,
|
|
2843
|
+
projectToolingSection
|
|
2501
2844
|
};
|
|
2502
2845
|
}
|
|
2503
|
-
async function runEvaluation(task, generatorModel, checkScript, sprintId, provider) {
|
|
2846
|
+
async function runEvaluation(task, generatorModel, checkScript, sprintId, provider, options) {
|
|
2504
2847
|
const p = provider ?? await getActiveProvider();
|
|
2505
2848
|
const evaluatorModel = getEvaluatorModel(generatorModel, p);
|
|
2506
2849
|
const sprintDir = getSprintDir(sprintId);
|
|
2507
2850
|
const ctx = buildEvaluatorContext(task, checkScript);
|
|
2508
2851
|
const prompt = buildEvaluatorPrompt(ctx);
|
|
2509
2852
|
const providerArgs = ["--add-dir", sprintDir];
|
|
2510
|
-
if (
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2853
|
+
if (p.name === "claude") {
|
|
2854
|
+
if (evaluatorModel) {
|
|
2855
|
+
providerArgs.push("--model", evaluatorModel);
|
|
2856
|
+
}
|
|
2857
|
+
providerArgs.push("--max-turns", String(EVALUATOR_MAX_TURNS));
|
|
2858
|
+
}
|
|
2859
|
+
await options?.coordinator?.waitIfPaused();
|
|
2860
|
+
const result = await spawnWithRetry(
|
|
2861
|
+
{
|
|
2862
|
+
cwd: task.projectPath,
|
|
2863
|
+
args: providerArgs,
|
|
2864
|
+
prompt,
|
|
2865
|
+
env: p.getSpawnEnv()
|
|
2866
|
+
},
|
|
2867
|
+
{ maxRetries: options?.maxRetries },
|
|
2868
|
+
p
|
|
2869
|
+
);
|
|
2519
2870
|
return parseEvaluationResult(result.stdout);
|
|
2520
2871
|
}
|
|
2521
2872
|
|
|
2522
2873
|
// src/ai/executor.ts
|
|
2874
|
+
var DEFAULT_MAX_TURNS = 200;
|
|
2523
2875
|
function buildProviderArgs(options, provider) {
|
|
2524
2876
|
if (provider.name !== "claude") {
|
|
2525
2877
|
if (options.maxBudgetUsd != null) {
|
|
@@ -2528,6 +2880,9 @@ function buildProviderArgs(options, provider) {
|
|
|
2528
2880
|
if (options.fallbackModel) {
|
|
2529
2881
|
console.log(warning(`--fallback-model is only supported with the Claude provider \u2014 ignored`));
|
|
2530
2882
|
}
|
|
2883
|
+
if (options.maxTurns != null) {
|
|
2884
|
+
console.log(warning(`--max-turns is only supported with the Claude provider \u2014 ignored`));
|
|
2885
|
+
}
|
|
2531
2886
|
return [];
|
|
2532
2887
|
}
|
|
2533
2888
|
const args = [];
|
|
@@ -2537,6 +2892,7 @@ function buildProviderArgs(options, provider) {
|
|
|
2537
2892
|
if (options.fallbackModel) {
|
|
2538
2893
|
args.push("--fallback-model", options.fallbackModel);
|
|
2539
2894
|
}
|
|
2895
|
+
args.push("--max-turns", String(options.maxTurns ?? DEFAULT_MAX_TURNS));
|
|
2540
2896
|
return args;
|
|
2541
2897
|
}
|
|
2542
2898
|
async function executeTask(ctx, options, sprintId, resumeSessionId, provider, checkStatus) {
|
|
@@ -2657,6 +3013,31 @@ async function executeTask(ctx, options, sprintId, resumeSessionId, provider, ch
|
|
|
2657
3013
|
return { ...parsed, sessionId: spawnResult.sessionId, model: spawnResult.model };
|
|
2658
3014
|
}
|
|
2659
3015
|
var MAX_EVAL_OUTPUT = 2e3;
|
|
3016
|
+
var EVAL_SPAWN_FAILURE_PREFIX = "Evaluator spawn failed:";
|
|
3017
|
+
function isEvalSpawnFailure(output) {
|
|
3018
|
+
return output.startsWith(EVAL_SPAWN_FAILURE_PREFIX);
|
|
3019
|
+
}
|
|
3020
|
+
async function runEvaluationSafely(task, generatorModel, checkScript, sprintId, provider, options, coordinator) {
|
|
3021
|
+
const evalR = await wrapAsync(
|
|
3022
|
+
() => runEvaluation(task, generatorModel, checkScript, sprintId, provider, {
|
|
3023
|
+
coordinator,
|
|
3024
|
+
maxRetries: options.maxRetries
|
|
3025
|
+
}),
|
|
3026
|
+
ensureError
|
|
3027
|
+
);
|
|
3028
|
+
if (evalR.ok) return evalR.value;
|
|
3029
|
+
const err = evalR.error;
|
|
3030
|
+
if (err instanceof SpawnError && err.rateLimited && coordinator) {
|
|
3031
|
+
coordinator.pause(err.retryAfterMs ?? 6e4);
|
|
3032
|
+
}
|
|
3033
|
+
console.log(warning(`Evaluator spawn failed for ${task.name}: ${err.message} \u2014 marking malformed`));
|
|
3034
|
+
return {
|
|
3035
|
+
passed: false,
|
|
3036
|
+
status: "malformed",
|
|
3037
|
+
output: `${EVAL_SPAWN_FAILURE_PREFIX} ${err.message}`,
|
|
3038
|
+
dimensions: []
|
|
3039
|
+
};
|
|
3040
|
+
}
|
|
2660
3041
|
async function runEvaluationLoop(params) {
|
|
2661
3042
|
const {
|
|
2662
3043
|
task,
|
|
@@ -2667,29 +3048,38 @@ async function runEvaluationLoop(params) {
|
|
|
2667
3048
|
options,
|
|
2668
3049
|
evalIterations,
|
|
2669
3050
|
checkTimeout,
|
|
2670
|
-
useSpinner = false
|
|
3051
|
+
useSpinner = false,
|
|
3052
|
+
coordinator
|
|
2671
3053
|
} = params;
|
|
2672
3054
|
const evalCheckScript = getEffectiveCheckScript(project, task.projectPath);
|
|
2673
3055
|
const sprintDir = getSprintDir(sprintId);
|
|
2674
|
-
let evalResult = await
|
|
2675
|
-
|
|
2676
|
-
|
|
3056
|
+
let evalResult = await runEvaluationSafely(
|
|
3057
|
+
task,
|
|
3058
|
+
result.model,
|
|
3059
|
+
evalCheckScript,
|
|
3060
|
+
sprintId,
|
|
3061
|
+
provider,
|
|
3062
|
+
options,
|
|
3063
|
+
coordinator
|
|
3064
|
+
);
|
|
3065
|
+
let evaluationFile = await tryWriteEvaluationEntry(sprintId, task, 1, evalResult);
|
|
3066
|
+
let currentSessionId = result.sessionId;
|
|
3067
|
+
let currentModel = result.model;
|
|
3068
|
+
for (let i = 0; i < evalIterations && !evalResult.passed && evalResult.status !== "malformed"; i++) {
|
|
3069
|
+
console.log(warning(`Evaluation failed for ${task.name} \u2014 fix attempt ${String(i + 1)}/${String(evalIterations)}`));
|
|
2677
3070
|
console.log(muted(evalResult.output.slice(0, 500)));
|
|
3071
|
+
const headBefore = getHeadSha(task.projectPath);
|
|
3072
|
+
const resumePrompt = buildEvaluationResumePrompt({
|
|
3073
|
+
critique: evalResult.output,
|
|
3074
|
+
needsCommit: !options.noCommit
|
|
3075
|
+
});
|
|
2678
3076
|
const resumeSpinner = useSpinner ? createSpinner(`Fixing evaluation issues: ${task.name}`).start() : null;
|
|
2679
3077
|
const resumeResult = await spawnWithRetry(
|
|
2680
3078
|
{
|
|
2681
3079
|
cwd: task.projectPath,
|
|
2682
3080
|
args: ["--add-dir", sprintDir, ...buildProviderArgs(options, provider)],
|
|
2683
|
-
prompt:
|
|
2684
|
-
|
|
2685
|
-
${evalResult.output}
|
|
2686
|
-
|
|
2687
|
-
Review the critique carefully. Fix each identified issue in the code, then:
|
|
2688
|
-
1. Re-run verification commands to confirm the fix
|
|
2689
|
-
${options.noCommit ? "" : "2. Commit the fix with a descriptive message\n"}${options.noCommit ? "2" : "3"}. Signal completion with <task-verified> and <task-complete>
|
|
2690
|
-
|
|
2691
|
-
If the critique is about something outside your task scope, fix only what is within scope and signal completion.`,
|
|
2692
|
-
resumeSessionId: result.sessionId ?? void 0,
|
|
3081
|
+
prompt: resumePrompt,
|
|
3082
|
+
resumeSessionId: currentSessionId ?? void 0,
|
|
2693
3083
|
env: provider.getSpawnEnv()
|
|
2694
3084
|
},
|
|
2695
3085
|
{
|
|
@@ -2703,37 +3093,88 @@ If the critique is about something outside your task scope, fix only what is wit
|
|
|
2703
3093
|
provider
|
|
2704
3094
|
);
|
|
2705
3095
|
resumeSpinner?.succeed(`Fix attempt completed: ${task.name}`);
|
|
3096
|
+
if (resumeResult.sessionId) currentSessionId = resumeResult.sessionId;
|
|
3097
|
+
if (resumeResult.model) currentModel = resumeResult.model;
|
|
2706
3098
|
const fixResult = parseExecutionResult(resumeResult.stdout);
|
|
2707
3099
|
if (!fixResult.success) {
|
|
2708
|
-
|
|
3100
|
+
const reason = `Generator could not fix issues after feedback (no <task-complete> signal)`;
|
|
3101
|
+
console.log(warning(`${reason}: ${task.name}`));
|
|
3102
|
+
const stubPath = await tryWriteEvaluationStub(sprintId, task, i + 2, reason);
|
|
3103
|
+
if (stubPath) evaluationFile = stubPath;
|
|
3104
|
+
break;
|
|
3105
|
+
}
|
|
3106
|
+
const headAfter = getHeadSha(task.projectPath);
|
|
3107
|
+
const dirtyR = Result8.try(() => hasUncommittedChanges(task.projectPath));
|
|
3108
|
+
const dirty = dirtyR.ok ? dirtyR.value : false;
|
|
3109
|
+
if (headBefore !== null && headAfter === headBefore && !dirty) {
|
|
3110
|
+
const reason = "Generator no-op (HEAD unchanged, no uncommitted changes)";
|
|
3111
|
+
console.log(warning(`${reason}: ${task.name}`));
|
|
3112
|
+
const stubPath = await tryWriteEvaluationStub(sprintId, task, i + 2, reason);
|
|
3113
|
+
if (stubPath) evaluationFile = stubPath;
|
|
2709
3114
|
break;
|
|
2710
3115
|
}
|
|
2711
3116
|
const recheckScript = getEffectiveCheckScript(project, task.projectPath);
|
|
2712
3117
|
if (recheckScript) {
|
|
2713
3118
|
const recheckResult = runLifecycleHook(task.projectPath, recheckScript, "taskComplete", checkTimeout);
|
|
2714
3119
|
if (!recheckResult.passed) {
|
|
3120
|
+
const reason = `Post-task check failed after generator fix: ${recheckResult.output.slice(0, 200)}`;
|
|
2715
3121
|
console.log(warning(`Post-task check failed after generator fix: ${task.name}`));
|
|
3122
|
+
const stubPath = await tryWriteEvaluationStub(sprintId, task, i + 2, reason);
|
|
3123
|
+
if (stubPath) evaluationFile = stubPath;
|
|
2716
3124
|
break;
|
|
2717
3125
|
}
|
|
2718
3126
|
}
|
|
2719
|
-
evalResult = await
|
|
3127
|
+
evalResult = await runEvaluationSafely(
|
|
3128
|
+
task,
|
|
3129
|
+
currentModel,
|
|
3130
|
+
evalCheckScript,
|
|
3131
|
+
sprintId,
|
|
3132
|
+
provider,
|
|
3133
|
+
options,
|
|
3134
|
+
coordinator
|
|
3135
|
+
);
|
|
3136
|
+
const entryPath = await tryWriteEvaluationEntry(sprintId, task, i + 2, evalResult);
|
|
3137
|
+
if (entryPath) evaluationFile = entryPath;
|
|
2720
3138
|
}
|
|
2721
3139
|
await updateTask(
|
|
2722
3140
|
task.id,
|
|
2723
3141
|
{
|
|
2724
3142
|
evaluated: true,
|
|
2725
|
-
|
|
3143
|
+
evaluationStatus: evalResult.status,
|
|
3144
|
+
evaluationOutput: evalResult.output.slice(0, MAX_EVAL_OUTPUT),
|
|
3145
|
+
...evaluationFile ? { evaluationFile } : {}
|
|
2726
3146
|
},
|
|
2727
3147
|
sprintId
|
|
2728
3148
|
);
|
|
2729
|
-
if (
|
|
3149
|
+
if (evalResult.status === "malformed") {
|
|
3150
|
+
const cause = isEvalSpawnFailure(evalResult.output) ? evalResult.output : "no signal, no dimensions";
|
|
3151
|
+
console.log(warning(`Evaluator output was malformed for ${task.name} (${cause}) \u2014 marking done`));
|
|
3152
|
+
} else if (!evalResult.passed) {
|
|
2730
3153
|
console.log(
|
|
2731
|
-
warning(`Evaluation did not pass after ${String(evalIterations)}
|
|
3154
|
+
warning(`Evaluation did not pass after ${String(evalIterations)} fix attempt(s) \u2014 marking done: ${task.name}`)
|
|
2732
3155
|
);
|
|
2733
3156
|
} else {
|
|
2734
3157
|
console.log(success(`Evaluation passed: ${task.name}`));
|
|
2735
3158
|
}
|
|
2736
3159
|
}
|
|
3160
|
+
async function tryWriteEvaluationEntry(sprintId, task, iteration, evalResult) {
|
|
3161
|
+
let body;
|
|
3162
|
+
if (evalResult.status === "malformed") {
|
|
3163
|
+
body = isEvalSpawnFailure(evalResult.output) ? evalResult.output : "_(evaluator output had no parseable signal \u2014 see executor stdout)_";
|
|
3164
|
+
} else {
|
|
3165
|
+
body = evalResult.output;
|
|
3166
|
+
}
|
|
3167
|
+
return tryWriteEvaluationRaw(sprintId, task, iteration, evalResult.status, body);
|
|
3168
|
+
}
|
|
3169
|
+
async function tryWriteEvaluationStub(sprintId, task, iteration, reason) {
|
|
3170
|
+
return tryWriteEvaluationRaw(sprintId, task, iteration, "failed", `_(no re-evaluation: ${reason})_`);
|
|
3171
|
+
}
|
|
3172
|
+
async function tryWriteEvaluationRaw(sprintId, task, iteration, status, body) {
|
|
3173
|
+
const writeR = await wrapAsync(() => writeEvaluation(sprintId, task.id, iteration, status, body), ensureError);
|
|
3174
|
+
if (writeR.ok) return writeR.value;
|
|
3175
|
+
console.log(warning(`Could not persist evaluation sidecar for ${task.name}: ${writeR.error.message}`));
|
|
3176
|
+
return null;
|
|
3177
|
+
}
|
|
2737
3178
|
async function areAllRemainingBlocked(sprintId) {
|
|
2738
3179
|
const remaining = await getRemainingTasks(sprintId);
|
|
2739
3180
|
if (remaining.length === 0) return false;
|
|
@@ -2878,9 +3319,10 @@ Starting ${label} in ${task.projectPath} (session)...
|
|
|
2878
3319
|
console.log(success("Verification: passed"));
|
|
2879
3320
|
}
|
|
2880
3321
|
const checkScript = getEffectiveCheckScript(project, task.projectPath);
|
|
3322
|
+
const sequentialRepo = project?.repositories.find((r) => r.path === task.projectPath);
|
|
2881
3323
|
if (checkScript) {
|
|
2882
3324
|
console.log(muted(`Running post-task check: ${checkScript}`));
|
|
2883
|
-
const hookResult = runLifecycleHook(task.projectPath, checkScript, "taskComplete");
|
|
3325
|
+
const hookResult = runLifecycleHook(task.projectPath, checkScript, "taskComplete", sequentialRepo?.checkTimeout);
|
|
2884
3326
|
if (!hookResult.passed) {
|
|
2885
3327
|
console.log(warning(`
|
|
2886
3328
|
Post-task check failed for: ${task.name}`));
|
|
@@ -2908,6 +3350,7 @@ Post-task check failed for: ${task.name}`));
|
|
|
2908
3350
|
provider,
|
|
2909
3351
|
options,
|
|
2910
3352
|
evalIterations,
|
|
3353
|
+
checkTimeout: sequentialRepo?.checkTimeout,
|
|
2911
3354
|
useSpinner: true
|
|
2912
3355
|
});
|
|
2913
3356
|
}
|
|
@@ -3241,7 +3684,8 @@ Post-task check failed for: ${settled.task.name}`));
|
|
|
3241
3684
|
provider,
|
|
3242
3685
|
options,
|
|
3243
3686
|
evalIterations,
|
|
3244
|
-
checkTimeout: taskRepo?.checkTimeout
|
|
3687
|
+
checkTimeout: taskRepo?.checkTimeout,
|
|
3688
|
+
coordinator
|
|
3245
3689
|
});
|
|
3246
3690
|
}
|
|
3247
3691
|
await updateTaskStatus(settled.task.id, "done", sprintId);
|
|
@@ -3320,125 +3764,6 @@ Waiting for ${String(running.size)} remaining task(s)...`));
|
|
|
3320
3764
|
};
|
|
3321
3765
|
}
|
|
3322
3766
|
|
|
3323
|
-
// src/utils/git.ts
|
|
3324
|
-
import { spawnSync as spawnSync3 } from "child_process";
|
|
3325
|
-
var BRANCH_NAME_RE = /^[a-zA-Z0-9/_.-]+$/;
|
|
3326
|
-
var BRANCH_NAME_INVALID_PATTERNS = [/\.\./, /\.$/, /\/$/, /\.lock$/, /^-/, /\/\//];
|
|
3327
|
-
function isValidBranchName(name) {
|
|
3328
|
-
if (!name || name.length > 250) return false;
|
|
3329
|
-
if (!BRANCH_NAME_RE.test(name)) return false;
|
|
3330
|
-
for (const pattern of BRANCH_NAME_INVALID_PATTERNS) {
|
|
3331
|
-
if (pattern.test(name)) return false;
|
|
3332
|
-
}
|
|
3333
|
-
return true;
|
|
3334
|
-
}
|
|
3335
|
-
function getCurrentBranch(cwd) {
|
|
3336
|
-
assertSafeCwd(cwd);
|
|
3337
|
-
const result = spawnSync3("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
|
|
3338
|
-
cwd,
|
|
3339
|
-
encoding: "utf-8",
|
|
3340
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3341
|
-
});
|
|
3342
|
-
if (result.status !== 0) {
|
|
3343
|
-
throw new Error(`Failed to get current branch in ${cwd}: ${result.stderr.trim()}`);
|
|
3344
|
-
}
|
|
3345
|
-
return result.stdout.trim();
|
|
3346
|
-
}
|
|
3347
|
-
function branchExists(cwd, name) {
|
|
3348
|
-
assertSafeCwd(cwd);
|
|
3349
|
-
if (!isValidBranchName(name)) {
|
|
3350
|
-
throw new Error(`Invalid branch name: ${name}`);
|
|
3351
|
-
}
|
|
3352
|
-
const result = spawnSync3("git", ["show-ref", "--verify", `refs/heads/${name}`], {
|
|
3353
|
-
cwd,
|
|
3354
|
-
encoding: "utf-8",
|
|
3355
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3356
|
-
});
|
|
3357
|
-
return result.status === 0;
|
|
3358
|
-
}
|
|
3359
|
-
function createAndCheckoutBranch(cwd, name) {
|
|
3360
|
-
assertSafeCwd(cwd);
|
|
3361
|
-
if (!isValidBranchName(name)) {
|
|
3362
|
-
throw new Error(`Invalid branch name: ${name}`);
|
|
3363
|
-
}
|
|
3364
|
-
const current = getCurrentBranch(cwd);
|
|
3365
|
-
if (current === name) {
|
|
3366
|
-
return;
|
|
3367
|
-
}
|
|
3368
|
-
if (branchExists(cwd, name)) {
|
|
3369
|
-
const result = spawnSync3("git", ["checkout", name], {
|
|
3370
|
-
cwd,
|
|
3371
|
-
encoding: "utf-8",
|
|
3372
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3373
|
-
});
|
|
3374
|
-
if (result.status !== 0) {
|
|
3375
|
-
throw new Error(`Failed to checkout branch '${name}' in ${cwd}: ${result.stderr.trim()}`);
|
|
3376
|
-
}
|
|
3377
|
-
} else {
|
|
3378
|
-
const result = spawnSync3("git", ["checkout", "-b", name], {
|
|
3379
|
-
cwd,
|
|
3380
|
-
encoding: "utf-8",
|
|
3381
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3382
|
-
});
|
|
3383
|
-
if (result.status !== 0) {
|
|
3384
|
-
throw new Error(`Failed to create branch '${name}' in ${cwd}: ${result.stderr.trim()}`);
|
|
3385
|
-
}
|
|
3386
|
-
}
|
|
3387
|
-
}
|
|
3388
|
-
function verifyCurrentBranch(cwd, expected) {
|
|
3389
|
-
const current = getCurrentBranch(cwd);
|
|
3390
|
-
return current === expected;
|
|
3391
|
-
}
|
|
3392
|
-
function getDefaultBranch(cwd) {
|
|
3393
|
-
assertSafeCwd(cwd);
|
|
3394
|
-
const result = spawnSync3("git", ["symbolic-ref", "refs/remotes/origin/HEAD"], {
|
|
3395
|
-
cwd,
|
|
3396
|
-
encoding: "utf-8",
|
|
3397
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3398
|
-
});
|
|
3399
|
-
if (result.status === 0) {
|
|
3400
|
-
const ref = result.stdout.trim();
|
|
3401
|
-
const parts = ref.split("/");
|
|
3402
|
-
return parts[parts.length - 1] ?? "main";
|
|
3403
|
-
}
|
|
3404
|
-
const stderr = result.stderr.trim();
|
|
3405
|
-
if (stderr.includes("is not a symbolic ref") || stderr.includes("No such ref")) {
|
|
3406
|
-
if (branchExists(cwd, "main")) return "main";
|
|
3407
|
-
if (branchExists(cwd, "master")) return "master";
|
|
3408
|
-
return "main";
|
|
3409
|
-
}
|
|
3410
|
-
throw new Error(`Failed to detect default branch in ${cwd}: ${stderr}`);
|
|
3411
|
-
}
|
|
3412
|
-
function hasUncommittedChanges(cwd) {
|
|
3413
|
-
assertSafeCwd(cwd);
|
|
3414
|
-
const result = spawnSync3("git", ["status", "--porcelain"], {
|
|
3415
|
-
cwd,
|
|
3416
|
-
encoding: "utf-8",
|
|
3417
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3418
|
-
});
|
|
3419
|
-
if (result.status !== 0) {
|
|
3420
|
-
throw new Error(`Failed to check git status in ${cwd}: ${result.stderr.trim()}`);
|
|
3421
|
-
}
|
|
3422
|
-
return result.stdout.trim().length > 0;
|
|
3423
|
-
}
|
|
3424
|
-
function generateBranchName(sprintId) {
|
|
3425
|
-
return `ralphctl/${sprintId}`;
|
|
3426
|
-
}
|
|
3427
|
-
function isGhAvailable() {
|
|
3428
|
-
const result = spawnSync3("gh", ["--version"], {
|
|
3429
|
-
encoding: "utf-8",
|
|
3430
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3431
|
-
});
|
|
3432
|
-
return result.status === 0;
|
|
3433
|
-
}
|
|
3434
|
-
function isGlabAvailable() {
|
|
3435
|
-
const result = spawnSync3("glab", ["--version"], {
|
|
3436
|
-
encoding: "utf-8",
|
|
3437
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
3438
|
-
});
|
|
3439
|
-
return result.status === 0;
|
|
3440
|
-
}
|
|
3441
|
-
|
|
3442
3767
|
// src/ai/runner.ts
|
|
3443
3768
|
async function promptBranchStrategy(sprintId) {
|
|
3444
3769
|
const autoBranch = generateBranchName(sprintId);
|
|
@@ -3488,7 +3813,7 @@ async function ensureSprintBranches(sprintId, sprint, branchName) {
|
|
|
3488
3813
|
const uniquePaths = [...new Set(remainingTasks.map((t) => t.projectPath))];
|
|
3489
3814
|
if (uniquePaths.length === 0) return;
|
|
3490
3815
|
for (const projectPath of uniquePaths) {
|
|
3491
|
-
const uncommittedR =
|
|
3816
|
+
const uncommittedR = Result9.try(() => hasUncommittedChanges(projectPath));
|
|
3492
3817
|
if (!uncommittedR.ok) {
|
|
3493
3818
|
log.dim(` Skipping ${projectPath} \u2014 not a git repository`);
|
|
3494
3819
|
continue;
|
|
@@ -3500,7 +3825,7 @@ async function ensureSprintBranches(sprintId, sprint, branchName) {
|
|
|
3500
3825
|
}
|
|
3501
3826
|
}
|
|
3502
3827
|
for (const projectPath of uniquePaths) {
|
|
3503
|
-
const branchR =
|
|
3828
|
+
const branchR = Result9.try(() => {
|
|
3504
3829
|
const currentBranch = getCurrentBranch(projectPath);
|
|
3505
3830
|
if (currentBranch === branchName) {
|
|
3506
3831
|
log.dim(` Already on branch '${branchName}' in ${projectPath}`);
|
|
@@ -3521,7 +3846,7 @@ async function ensureSprintBranches(sprintId, sprint, branchName) {
|
|
|
3521
3846
|
}
|
|
3522
3847
|
}
|
|
3523
3848
|
function verifySprintBranch(projectPath, expectedBranch) {
|
|
3524
|
-
const r =
|
|
3849
|
+
const r = Result9.try(() => {
|
|
3525
3850
|
if (verifyCurrentBranch(projectPath, expectedBranch)) return true;
|
|
3526
3851
|
log.dim(` Branch mismatch in ${projectPath} \u2014 checking out '${expectedBranch}'`);
|
|
3527
3852
|
createAndCheckoutBranch(projectPath, expectedBranch);
|
|
@@ -3801,6 +4126,16 @@ function parseArgs3(args) {
|
|
|
3801
4126
|
throw new Error("Invalid model name \u2014 must be 1-100 alphanumeric characters, dots, hyphens, or underscores");
|
|
3802
4127
|
}
|
|
3803
4128
|
options.fallbackModel = modelStr;
|
|
4129
|
+
} else if (arg === "--max-turns") {
|
|
4130
|
+
const turnsStr = args[++i];
|
|
4131
|
+
if (!turnsStr) {
|
|
4132
|
+
throw new Error("--max-turns requires a number");
|
|
4133
|
+
}
|
|
4134
|
+
const turns = parseInt(turnsStr, 10);
|
|
4135
|
+
if (isNaN(turns) || turns <= 0) {
|
|
4136
|
+
throw new Error("--max-turns must be a positive integer");
|
|
4137
|
+
}
|
|
4138
|
+
options.maxTurns = turns;
|
|
3804
4139
|
} else if (arg === "--no-evaluate") {
|
|
3805
4140
|
options.noEvaluate = true;
|
|
3806
4141
|
} else if (!arg?.startsWith("-")) {
|
|
@@ -3810,7 +4145,7 @@ function parseArgs3(args) {
|
|
|
3810
4145
|
return { sprintId, options };
|
|
3811
4146
|
}
|
|
3812
4147
|
async function sprintStartCommand(args) {
|
|
3813
|
-
const parseR =
|
|
4148
|
+
const parseR = Result10.try(() => parseArgs3(args));
|
|
3814
4149
|
if (!parseR.ok) {
|
|
3815
4150
|
showError(parseR.error.message);
|
|
3816
4151
|
log.newline();
|