ralphctl 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/{add-TGJTRHIF.mjs → add-3T225IX5.mjs} +3 -3
- package/dist/{add-SEDQ3VK7.mjs → add-6A5432U2.mjs} +4 -4
- package/dist/{chunk-XPDI4SYI.mjs → chunk-742XQ7FL.mjs} +3 -3
- package/dist/{chunk-XQHEKKDN.mjs → chunk-DUU5346E.mjs} +1 -1
- package/dist/{chunk-LG6B7QVO.mjs → chunk-EUNAUHC3.mjs} +1 -1
- package/dist/{chunk-ZDEVRTGY.mjs → chunk-IB6OCKZW.mjs} +24 -2
- package/dist/{chunk-KPTPKLXY.mjs → chunk-JRFOUFD3.mjs} +1 -1
- package/dist/{chunk-XXIHDQOH.mjs → chunk-U62BX47C.mjs} +508 -173
- package/dist/{chunk-Q3VWJARJ.mjs → chunk-UBPZHHCD.mjs} +2 -2
- package/dist/cli.mjs +105 -16
- package/dist/{create-DJHCP7LN.mjs → create-MYGOWO2F.mjs} +3 -3
- package/dist/{handle-CCTBNAJZ.mjs → handle-TA4MYNQJ.mjs} +1 -1
- package/dist/{project-ZYGNPVGL.mjs → project-YONEJICR.mjs} +2 -2
- package/dist/prompts/ideate-auto.md +9 -5
- package/dist/prompts/ideate.md +28 -12
- package/dist/prompts/plan-auto.md +26 -16
- package/dist/prompts/plan-common.md +67 -22
- package/dist/prompts/plan-interactive.md +26 -27
- package/dist/prompts/task-evaluation-resume.md +22 -0
- package/dist/prompts/task-evaluation.md +146 -24
- package/dist/prompts/task-execution.md +58 -36
- package/dist/prompts/ticket-refine.md +24 -20
- package/dist/{resolver-L52KR4GY.mjs → resolver-RXEY6EJE.mjs} +2 -2
- package/dist/{sprint-LUXAV3Q3.mjs → sprint-FGLWYWKX.mjs} +2 -2
- package/dist/{wizard-D7N5WZ5H.mjs → wizard-HWOH2HPV.mjs} +6 -6
- package/package.json +6 -6
- package/schemas/task-import.schema.json +7 -0
- package/schemas/tasks.schema.json +18 -1
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "./chunk-7TG3EAQ2.mjs";
|
|
9
9
|
import {
|
|
10
10
|
createProject
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
12
12
|
import {
|
|
13
13
|
ensureError,
|
|
14
14
|
wrapAsync
|
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
import {
|
|
17
17
|
expandTilde,
|
|
18
18
|
validateProjectPath
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-IB6OCKZW.mjs";
|
|
20
20
|
import {
|
|
21
21
|
IOError,
|
|
22
22
|
ProjectExistsError
|
package/dist/cli.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
addCheckScriptToRepository,
|
|
4
4
|
projectAddCommand
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-UBPZHHCD.mjs";
|
|
6
6
|
import {
|
|
7
7
|
addTask,
|
|
8
8
|
areAllTasksDone,
|
|
@@ -52,13 +52,13 @@ import {
|
|
|
52
52
|
sprintStartCommand,
|
|
53
53
|
updateTaskStatus,
|
|
54
54
|
validateImportTasks
|
|
55
|
-
} from "./chunk-
|
|
55
|
+
} from "./chunk-U62BX47C.mjs";
|
|
56
56
|
import {
|
|
57
57
|
escapableSelect
|
|
58
58
|
} from "./chunk-7LZ6GOGN.mjs";
|
|
59
59
|
import {
|
|
60
60
|
sprintCreateCommand
|
|
61
|
-
} from "./chunk-
|
|
61
|
+
} from "./chunk-DUU5346E.mjs";
|
|
62
62
|
import {
|
|
63
63
|
addTicket,
|
|
64
64
|
allRequirementsApproved,
|
|
@@ -73,7 +73,7 @@ import {
|
|
|
73
73
|
removeTicket,
|
|
74
74
|
ticketAddCommand,
|
|
75
75
|
updateTicket
|
|
76
|
-
} from "./chunk-
|
|
76
|
+
} from "./chunk-742XQ7FL.mjs";
|
|
77
77
|
import {
|
|
78
78
|
EXIT_ERROR,
|
|
79
79
|
exitWithCode
|
|
@@ -84,7 +84,7 @@ import {
|
|
|
84
84
|
listProjects,
|
|
85
85
|
removeProject,
|
|
86
86
|
removeProjectRepo
|
|
87
|
-
} from "./chunk-
|
|
87
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
88
88
|
import {
|
|
89
89
|
DEFAULT_EVALUATION_ITERATIONS,
|
|
90
90
|
assertSprintStatus,
|
|
@@ -107,7 +107,7 @@ import {
|
|
|
107
107
|
setEditor,
|
|
108
108
|
setEvaluationIterations,
|
|
109
109
|
withFileLock
|
|
110
|
-
} from "./chunk-
|
|
110
|
+
} from "./chunk-JRFOUFD3.mjs";
|
|
111
111
|
import {
|
|
112
112
|
ensureError,
|
|
113
113
|
wrapAsync
|
|
@@ -122,6 +122,7 @@ import {
|
|
|
122
122
|
TaskStatusSchema,
|
|
123
123
|
TasksSchema,
|
|
124
124
|
assertSafeCwd,
|
|
125
|
+
ensureDir,
|
|
125
126
|
expandTilde,
|
|
126
127
|
fileExists,
|
|
127
128
|
getDataDir,
|
|
@@ -133,7 +134,7 @@ import {
|
|
|
133
134
|
getTasksFilePath,
|
|
134
135
|
readValidatedJson,
|
|
135
136
|
validateProjectPath
|
|
136
|
-
} from "./chunk-
|
|
137
|
+
} from "./chunk-IB6OCKZW.mjs";
|
|
137
138
|
import {
|
|
138
139
|
DomainError,
|
|
139
140
|
NoCurrentSprintError,
|
|
@@ -3763,7 +3764,7 @@ async function interactiveMode() {
|
|
|
3763
3764
|
continue;
|
|
3764
3765
|
}
|
|
3765
3766
|
if (command === "wizard") {
|
|
3766
|
-
const { runWizard } = await import("./wizard-
|
|
3767
|
+
const { runWizard } = await import("./wizard-HWOH2HPV.mjs");
|
|
3767
3768
|
await runWizard();
|
|
3768
3769
|
continue;
|
|
3769
3770
|
}
|
|
@@ -3898,6 +3899,87 @@ async function sprintSwitchCommand() {
|
|
|
3898
3899
|
log.newline();
|
|
3899
3900
|
}
|
|
3900
3901
|
|
|
3902
|
+
// src/commands/sprint/insights.ts
|
|
3903
|
+
import { writeFile as writeFile2 } from "fs/promises";
|
|
3904
|
+
import { join as join5 } from "path";
|
|
3905
|
+
async function sprintInsightsCommand(args) {
|
|
3906
|
+
const exportFlag = args.includes("--export");
|
|
3907
|
+
const positionalArgs = args.filter((a) => !a.startsWith("--"));
|
|
3908
|
+
const sprintId = positionalArgs[0];
|
|
3909
|
+
const sprintR = await wrapAsync(async () => {
|
|
3910
|
+
if (sprintId) return getSprint(sprintId);
|
|
3911
|
+
return getCurrentSprintOrThrow();
|
|
3912
|
+
}, ensureError);
|
|
3913
|
+
if (!sprintR.ok) {
|
|
3914
|
+
showError(sprintR.error.message);
|
|
3915
|
+
return;
|
|
3916
|
+
}
|
|
3917
|
+
const sprint = sprintR.value;
|
|
3918
|
+
const tasks = await getTasks(sprint.id);
|
|
3919
|
+
printHeader(`Sprint Insights: ${sprint.name}`, icons.sprint);
|
|
3920
|
+
const evaluatedTasks = tasks.filter((t) => t.evaluated);
|
|
3921
|
+
if (evaluatedTasks.length === 0) {
|
|
3922
|
+
log.info("No evaluation data found for this sprint.");
|
|
3923
|
+
return;
|
|
3924
|
+
}
|
|
3925
|
+
const totalTasks = tasks.length;
|
|
3926
|
+
const evaluatedCount = evaluatedTasks.length;
|
|
3927
|
+
const withOutput = evaluatedTasks.filter((t) => t.evaluationOutput && t.evaluationOutput.trim().length > 0);
|
|
3928
|
+
console.log(` Tasks evaluated: ${colors.accent(String(evaluatedCount))} / ${String(totalTasks)} total`);
|
|
3929
|
+
log.newline();
|
|
3930
|
+
if (withOutput.length > 0) {
|
|
3931
|
+
console.log(` ${colors.accent("Evaluation output:")}`);
|
|
3932
|
+
for (const task of withOutput) {
|
|
3933
|
+
const output = task.evaluationOutput ?? "";
|
|
3934
|
+
const truncated = output.length > 200 ? output.slice(0, 200) + "..." : output;
|
|
3935
|
+
console.log(` ${icons.bullet} ${colors.accent(task.name)}: ${colors.muted(truncated)}`);
|
|
3936
|
+
}
|
|
3937
|
+
log.newline();
|
|
3938
|
+
}
|
|
3939
|
+
console.log(` ${colors.accent("Harness recommendations:")}`);
|
|
3940
|
+
if (withOutput.length > 1) {
|
|
3941
|
+
console.log(
|
|
3942
|
+
` ${icons.bullet} Consider reviewing evaluation failure patterns and updating CLAUDE.md with lessons learned.`
|
|
3943
|
+
);
|
|
3944
|
+
}
|
|
3945
|
+
if (withOutput.length > 0) {
|
|
3946
|
+
console.log(
|
|
3947
|
+
` ${icons.bullet} Run: ${colors.muted("ralphctl sprint insights --export")} to save details to $RALPHCTL_ROOT/insights/<sprint-id>.md`
|
|
3948
|
+
);
|
|
3949
|
+
}
|
|
3950
|
+
log.newline();
|
|
3951
|
+
if (exportFlag) {
|
|
3952
|
+
await exportInsights(sprint, tasks);
|
|
3953
|
+
}
|
|
3954
|
+
}
|
|
3955
|
+
async function exportInsights(sprint, tasks) {
|
|
3956
|
+
const dir = join5(getDataDir(), "insights");
|
|
3957
|
+
await ensureDir(dir);
|
|
3958
|
+
const filePath = join5(dir, `${sprint.id}.md`);
|
|
3959
|
+
const evaluatedCount = tasks.filter((t) => t.evaluated).length;
|
|
3960
|
+
const lines = [
|
|
3961
|
+
`# Sprint Insights: ${sprint.name}`,
|
|
3962
|
+
"",
|
|
3963
|
+
`**Date:** ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
3964
|
+
`**Sprint ID:** ${sprint.id}`,
|
|
3965
|
+
`**Tasks evaluated:** ${String(evaluatedCount)} / ${String(tasks.length)} total`,
|
|
3966
|
+
"",
|
|
3967
|
+
"## Evaluation Details"
|
|
3968
|
+
];
|
|
3969
|
+
for (const task of tasks) {
|
|
3970
|
+
lines.push("");
|
|
3971
|
+
lines.push(`### ${task.name} (${task.id})`);
|
|
3972
|
+
lines.push(`**Status:** ${task.status}`);
|
|
3973
|
+
lines.push(`**Evaluated:** ${task.evaluated ? "yes" : "no"}`);
|
|
3974
|
+
lines.push("");
|
|
3975
|
+
lines.push(task.evaluationOutput ?? "No evaluation output");
|
|
3976
|
+
lines.push("");
|
|
3977
|
+
lines.push("---");
|
|
3978
|
+
}
|
|
3979
|
+
await writeFile2(filePath, lines.join("\n"), "utf-8");
|
|
3980
|
+
log.success(`Insights exported to ${colors.accent(filePath)}`);
|
|
3981
|
+
}
|
|
3982
|
+
|
|
3901
3983
|
// src/commands/sprint/index.ts
|
|
3902
3984
|
function registerSprintCommands(program2) {
|
|
3903
3985
|
const sprint = program2.command("sprint").description("Manage sprints");
|
|
@@ -3974,7 +4056,13 @@ Examples:
|
|
|
3974
4056
|
sprint.command("health").description("Check sprint health").action(async () => {
|
|
3975
4057
|
await sprintHealthCommand();
|
|
3976
4058
|
});
|
|
3977
|
-
sprint.command("
|
|
4059
|
+
sprint.command("insights [id]").description("Analyze evaluation results and suggest improvements").option("--export", "Export insights to $RALPHCTL_ROOT/insights/<sprint-id>.md").action(async (id, opts) => {
|
|
4060
|
+
const args = [];
|
|
4061
|
+
if (id) args.push(id);
|
|
4062
|
+
if (opts?.export) args.push("--export");
|
|
4063
|
+
await sprintInsightsCommand(args);
|
|
4064
|
+
});
|
|
4065
|
+
sprint.command("start [id]").description("Run automated implementation loop").option("-s, --session", "Interactive AI session (collaborate with your AI provider)").option("-t, --step", "Step through tasks with approval between each").option("-c, --count <n>", "Limit to N tasks").option("--no-commit", "Skip automatic git commit after each task completes").option("--concurrency <n>", "Max parallel tasks (default: auto based on unique repos)").option("--max-retries <n>", "Max rate-limit retries per task (default: 5)").option("--fail-fast", "Stop launching new tasks on first failure").option("-f, --force", "Skip precondition checks (e.g., unplanned tickets)").option("--refresh-check", "Force re-run check scripts even if they already ran this sprint").option("-b, --branch", "Create sprint branch (ralphctl/<sprint-id>) in all repos").option("--branch-name <name>", "Use a custom branch name for sprint execution").option("--max-budget-usd <amount>", "Max USD budget per AI task (Claude only)").option("--fallback-model <model>", "Fallback model when primary is overloaded (Claude only)").option("--max-turns <number>", "Max agentic turns per task (Claude only, default: 200)").addHelpText(
|
|
3978
4066
|
"after",
|
|
3979
4067
|
`
|
|
3980
4068
|
Exit Codes:
|
|
@@ -4012,6 +4100,7 @@ Branch Management:
|
|
|
4012
4100
|
if (opts?.branchName) args.push("--branch-name", opts.branchName);
|
|
4013
4101
|
if (opts?.maxBudgetUsd) args.push("--max-budget-usd", opts.maxBudgetUsd);
|
|
4014
4102
|
if (opts?.fallbackModel) args.push("--fallback-model", opts.fallbackModel);
|
|
4103
|
+
if (opts?.maxTurns) args.push("--max-turns", opts.maxTurns);
|
|
4015
4104
|
await sprintStartCommand(args);
|
|
4016
4105
|
}
|
|
4017
4106
|
);
|
|
@@ -4234,7 +4323,7 @@ Checks performed:
|
|
|
4234
4323
|
// package.json
|
|
4235
4324
|
var package_default = {
|
|
4236
4325
|
name: "ralphctl",
|
|
4237
|
-
version: "0.2.
|
|
4326
|
+
version: "0.2.4",
|
|
4238
4327
|
description: "Agent harness for long-running AI coding tasks \u2014 orchestrates Claude Code & GitHub Copilot across repositories",
|
|
4239
4328
|
homepage: "https://github.com/lukas-grigis/ralphctl",
|
|
4240
4329
|
type: "module",
|
|
@@ -4299,10 +4388,10 @@ var package_default = {
|
|
|
4299
4388
|
},
|
|
4300
4389
|
devDependencies: {
|
|
4301
4390
|
"@eslint/js": "^10.0.1",
|
|
4302
|
-
"@types/node": "^25.5.
|
|
4391
|
+
"@types/node": "^25.5.2",
|
|
4303
4392
|
"@types/tabtab": "^3.0.4",
|
|
4304
|
-
"@vitest/coverage-v8": "^4.1.
|
|
4305
|
-
eslint: "^10.
|
|
4393
|
+
"@vitest/coverage-v8": "^4.1.2",
|
|
4394
|
+
eslint: "^10.2.0",
|
|
4306
4395
|
"eslint-config-prettier": "^10.1.8",
|
|
4307
4396
|
globals: "^17.4.0",
|
|
4308
4397
|
husky: "^9.1.7",
|
|
@@ -4311,8 +4400,8 @@ var package_default = {
|
|
|
4311
4400
|
tsup: "^8.5.1",
|
|
4312
4401
|
tsx: "^4.21.0",
|
|
4313
4402
|
typescript: "^5.9.3",
|
|
4314
|
-
"typescript-eslint": "^8.
|
|
4315
|
-
vitest: "^4.1.
|
|
4403
|
+
"typescript-eslint": "^8.58.0",
|
|
4404
|
+
vitest: "^4.1.2"
|
|
4316
4405
|
},
|
|
4317
4406
|
"lint-staged": {
|
|
4318
4407
|
"*.ts": [
|
|
@@ -4356,7 +4445,7 @@ registerCompletionCommands(program);
|
|
|
4356
4445
|
registerDoctorCommands(program);
|
|
4357
4446
|
async function main() {
|
|
4358
4447
|
if (process.env["COMP_CWORD"] && process.env["COMP_POINT"] && process.env["COMP_LINE"]) {
|
|
4359
|
-
const { handleCompletionRequest } = await import("./handle-
|
|
4448
|
+
const { handleCompletionRequest } = await import("./handle-TA4MYNQJ.mjs");
|
|
4360
4449
|
if (await handleCompletionRequest(program)) return;
|
|
4361
4450
|
}
|
|
4362
4451
|
if (process.argv.length <= 2 || process.argv[2] === "interactive") {
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
sprintCreateCommand
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
4
|
+
} from "./chunk-DUU5346E.mjs";
|
|
5
|
+
import "./chunk-JRFOUFD3.mjs";
|
|
6
6
|
import "./chunk-OEUJDSHY.mjs";
|
|
7
|
-
import "./chunk-
|
|
7
|
+
import "./chunk-IB6OCKZW.mjs";
|
|
8
8
|
import "./chunk-EDJX7TT6.mjs";
|
|
9
9
|
import "./chunk-QBXHAXHI.mjs";
|
|
10
10
|
export {
|
|
@@ -7,7 +7,7 @@ async function handleCompletionRequest(program) {
|
|
|
7
7
|
return false;
|
|
8
8
|
}
|
|
9
9
|
const tabtab = (await import("tabtab")).default;
|
|
10
|
-
const { resolveCompletions } = await import("./resolver-
|
|
10
|
+
const { resolveCompletions } = await import("./resolver-RXEY6EJE.mjs");
|
|
11
11
|
const tabEnv = tabtab.parseEnv(env);
|
|
12
12
|
const completions = await resolveCompletions(program, {
|
|
13
13
|
line: tabEnv.line,
|
|
@@ -9,8 +9,8 @@ import {
|
|
|
9
9
|
removeProject,
|
|
10
10
|
removeProjectRepo,
|
|
11
11
|
updateProject
|
|
12
|
-
} from "./chunk-
|
|
13
|
-
import "./chunk-
|
|
12
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
13
|
+
import "./chunk-IB6OCKZW.mjs";
|
|
14
14
|
import {
|
|
15
15
|
ProjectExistsError,
|
|
16
16
|
ProjectNotFoundError
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Autonomous Ideation to Implementation
|
|
2
2
|
|
|
3
|
-
You are a combined requirements analyst and task planner working autonomously.
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
You are a combined requirements analyst and task planner working autonomously. Turn a rough idea into refined
|
|
4
|
+
requirements and a dependency-ordered set of implementation tasks. Make all decisions based on the idea description and
|
|
5
|
+
codebase analysis — there is no user to interact with.
|
|
6
6
|
|
|
7
7
|
## Two-Phase Protocol
|
|
8
8
|
|
|
@@ -96,8 +96,6 @@ Before outputting JSON, verify:
|
|
|
96
96
|
6. **Verification steps** — Every task ends with project-appropriate verification commands
|
|
97
97
|
7. **projectPath assigned** — Every task uses a path from the Selected Repositories
|
|
98
98
|
|
|
99
|
-
If you cannot produce a valid plan, signal: `<planning-blocked>reason</planning-blocked>`
|
|
100
|
-
|
|
101
99
|
## Output Format
|
|
102
100
|
|
|
103
101
|
Output a single JSON object with both requirements and tasks.
|
|
@@ -139,6 +137,12 @@ If you cannot produce a valid plan, output `<planning-blocked>reason</planning-b
|
|
|
139
137
|
"Add integration test in src/controllers/__tests__/export.test.ts for filtered and unfiltered queries",
|
|
140
138
|
"Run pnpm typecheck && pnpm lint && pnpm test — all pass"
|
|
141
139
|
],
|
|
140
|
+
"verificationCriteria": [
|
|
141
|
+
"TypeScript compiles with no errors",
|
|
142
|
+
"All existing tests pass plus new tests for date range filtering",
|
|
143
|
+
"GET /exports?startDate=invalid returns 400 with validation error",
|
|
144
|
+
"Filtered query returns only records within the specified date range"
|
|
145
|
+
],
|
|
142
146
|
"blockedBy": []
|
|
143
147
|
}
|
|
144
148
|
]
|
package/dist/prompts/ideate.md
CHANGED
|
@@ -9,12 +9,13 @@ requirements and a dependency-ordered set of implementation tasks in a single se
|
|
|
9
9
|
|
|
10
10
|
Focus: Clarify WHAT needs to be built (implementation-agnostic)
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
<constraints>
|
|
13
13
|
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
|
|
14
|
+
- Focus exclusively on requirements, acceptance criteria, and scope — codebase exploration happens in Phase 2
|
|
15
|
+
- Frame requirements as observable behavior, not implementation details — this keeps Phase 2 flexible
|
|
16
|
+
- Repositories are already selected; repository selection is not part of this phase
|
|
17
|
+
|
|
18
|
+
</constraints>
|
|
18
19
|
|
|
19
20
|
**Steps:**
|
|
20
21
|
|
|
@@ -77,6 +78,14 @@ Focus: Determine HOW to implement the approved requirements
|
|
|
77
78
|
|
|
78
79
|
**After requirements are approved, proceed to implementation planning.**
|
|
79
80
|
|
|
81
|
+
<constraints>
|
|
82
|
+
|
|
83
|
+
- This is a planning session — your only output is a JSON task plan written to the output file. Use tools for reading
|
|
84
|
+
and analysis only (search, read, explore). Creating files, writing code, or making commits would conflict with the
|
|
85
|
+
task execution phase that follows.
|
|
86
|
+
|
|
87
|
+
</constraints>
|
|
88
|
+
|
|
80
89
|
**Steps:**
|
|
81
90
|
|
|
82
91
|
1. **Explore the codebase** — Read the repository instruction files (`CLAUDE.md`, `.github/copilot-instructions.md`,
|
|
@@ -84,17 +93,15 @@ Focus: Determine HOW to implement the approved requirements
|
|
|
84
93
|
2. **Review approved requirements** — Understand WHAT was approved in Phase 1
|
|
85
94
|
3. **Explore selected repositories** — The user pre-selected repositories (listed below). Deep-dive to understand
|
|
86
95
|
patterns, conventions, and existing code
|
|
87
|
-
4. **Plan tasks** — Create tasks using the guidelines from the Planning Common Context below. Use tools
|
|
88
|
-
|
|
89
|
-
- **Grep/glob** — Find specific patterns, existing implementations
|
|
90
|
-
- **File reading** — Understand implementation details
|
|
96
|
+
4. **Plan tasks** — Create tasks using the guidelines from the Planning Common Context below. Use available tools to
|
|
97
|
+
search, explore, and read the codebase.
|
|
91
98
|
5. **Ask implementation questions** — Use AskUserQuestion for decisions (library choice, approach, architecture
|
|
92
99
|
patterns)
|
|
93
100
|
6. **Present task breakdown** — SHOW BEFORE WRITE. Present tasks in readable markdown:
|
|
94
101
|
- List each task with repository, blocked by, and steps
|
|
95
102
|
- Show dependency graph
|
|
96
103
|
- Ask: "Does this task breakdown look correct? Any changes needed?"
|
|
97
|
-
7. **Wait for confirmation** —
|
|
104
|
+
7. **Wait for confirmation** — write the JSON to the output file after the user confirms
|
|
98
105
|
|
|
99
106
|
## Idea to Refine and Plan
|
|
100
107
|
|
|
@@ -112,7 +119,8 @@ The user pre-selected these repositories for exploration:
|
|
|
112
119
|
|
|
113
120
|
{{REPOSITORIES}}
|
|
114
121
|
|
|
115
|
-
|
|
122
|
+
These paths are fixed — repository selection is a separate workflow step. If a critical repository seems missing,
|
|
123
|
+
mention it as an observation.
|
|
116
124
|
|
|
117
125
|
## Planning Common Context
|
|
118
126
|
|
|
@@ -120,7 +128,9 @@ The user pre-selected these repositories for exploration:
|
|
|
120
128
|
|
|
121
129
|
## Output Format
|
|
122
130
|
|
|
123
|
-
When BOTH phases are approved by the user, write to: {{OUTPUT_FILE}}
|
|
131
|
+
When BOTH phases are approved by the user, write the JSON to: {{OUTPUT_FILE}}
|
|
132
|
+
|
|
133
|
+
Write only this single output file — no code, no implementation. The harness feeds this plan to task executors.
|
|
124
134
|
|
|
125
135
|
Use this exact JSON Schema:
|
|
126
136
|
|
|
@@ -146,6 +156,12 @@ Use this exact JSON Schema:
|
|
|
146
156
|
"Write tests in src/controllers/__tests__/export.test.ts for: no dates, valid range, invalid range, start > end",
|
|
147
157
|
"Run pnpm typecheck && pnpm lint && pnpm test — all pass"
|
|
148
158
|
],
|
|
159
|
+
"verificationCriteria": [
|
|
160
|
+
"TypeScript compiles with no errors",
|
|
161
|
+
"All existing tests pass plus new tests for date range filtering",
|
|
162
|
+
"GET /api/export?startDate=invalid returns 400 with validation error",
|
|
163
|
+
"GET /api/export?startDate=2024-01-01&endDate=2024-12-31 returns only matching records"
|
|
164
|
+
],
|
|
149
165
|
"blockedBy": []
|
|
150
166
|
}
|
|
151
167
|
]
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# Headless Task Planning Protocol
|
|
2
2
|
|
|
3
|
-
You are a task planning specialist. Your goal is to produce a dependency-ordered set of implementation tasks — each one
|
|
4
|
-
a
|
|
5
|
-
self-contained mini-spec that can be picked up cold and completed in a single AI session. Make all decisions
|
|
3
|
+
You are a task planning specialist. Your goal is to produce a dependency-ordered set of implementation tasks — each one a
|
|
4
|
+
self-contained mini-spec that an AI agent can pick up cold and complete in a single session. Make all decisions
|
|
6
5
|
autonomously based on codebase analysis — there is no user to interact with.
|
|
7
6
|
|
|
8
7
|
## Protocol
|
|
@@ -11,20 +10,18 @@ autonomously based on codebase analysis — there is no user to interact with.
|
|
|
11
10
|
|
|
12
11
|
Explore efficiently — read what matters, skip what does not:
|
|
13
12
|
|
|
14
|
-
1. **Read project instructions first** —
|
|
15
|
-
such
|
|
16
|
-
as `.github/copilot-instructions.md` when present. Follow any links to other documentation. Check `.claude/`
|
|
13
|
+
1. **Read project instructions first** — start with `CLAUDE.md` if it exists, and also check provider-specific files
|
|
14
|
+
such as `.github/copilot-instructions.md` when present. Follow any links to other documentation. Check `.claude/`
|
|
17
15
|
directory for agents, rules, and memory (see "Project Resources" section below).
|
|
18
16
|
2. **Read manifest files** — package.json, pyproject.toml, Cargo.toml, go.mod, pom.xml, etc. for dependencies and
|
|
19
17
|
scripts
|
|
20
|
-
3. **Read README** —
|
|
21
|
-
4. **Scan directory structure** —
|
|
22
|
-
5. **Find similar implementations** —
|
|
23
|
-
|
|
24
|
-
6. **Extract verification commands** — Find the exact build, test, lint, and typecheck commands
|
|
18
|
+
3. **Read README** — project overview, setup, and architecture
|
|
19
|
+
4. **Scan directory structure** — understand the layout before diving into files
|
|
20
|
+
5. **Find similar implementations** — look for existing features similar to what tickets require; follow their patterns
|
|
21
|
+
6. **Extract verification commands** — find the exact build, test, lint, and typecheck commands
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
Read project instruction files and README first, then only the specific files needed to understand patterns and plan
|
|
24
|
+
tasks — broad exploration wastes context budget without improving task quality.
|
|
28
25
|
|
|
29
26
|
### Step 2: Review Ticket Requirements
|
|
30
27
|
|
|
@@ -78,13 +75,14 @@ Before outputting JSON, verify EVERY item on this checklist:
|
|
|
78
75
|
6. **Verification steps** — Every task ends with project-appropriate verification commands from the repository
|
|
79
76
|
instructions
|
|
80
77
|
7. **projectPath assigned** — Every task has a `projectPath` from the project's repository paths
|
|
81
|
-
8. **
|
|
78
|
+
8. **Verification criteria** — Every task has 2-4 verificationCriteria that are testable and unambiguous
|
|
82
79
|
9. **Valid JSON** — The output parses as valid JSON matching the schema
|
|
83
80
|
|
|
84
81
|
## Output
|
|
85
82
|
|
|
86
|
-
|
|
87
|
-
If you cannot produce tasks, output a
|
|
83
|
+
Output only valid JSON matching the schema below — no markdown, no explanation, no commentary. The harness parses
|
|
84
|
+
your raw output as JSON, so any surrounding text will cause a parse failure. If you cannot produce tasks, output a
|
|
85
|
+
`<planning-blocked>` signal instead.
|
|
88
86
|
|
|
89
87
|
JSON Schema:
|
|
90
88
|
|
|
@@ -113,6 +111,12 @@ JSON Schema:
|
|
|
113
111
|
"Add corresponding unit tests in src/utils/__tests__/validation.test.ts covering valid inputs, invalid inputs, and edge cases (empty strings, unicode)",
|
|
114
112
|
"Run pnpm typecheck && pnpm lint && pnpm test — all pass"
|
|
115
113
|
],
|
|
114
|
+
"verificationCriteria": [
|
|
115
|
+
"TypeScript compiles with no errors",
|
|
116
|
+
"All existing tests pass plus new validation utility tests",
|
|
117
|
+
"validateEmail rejects invalid formats and accepts valid ones",
|
|
118
|
+
"validateDateRange rejects reversed date ranges"
|
|
119
|
+
],
|
|
116
120
|
"blockedBy": []
|
|
117
121
|
},
|
|
118
122
|
{
|
|
@@ -128,6 +132,12 @@ JSON Schema:
|
|
|
128
132
|
"Write component tests in src/components/__tests__/RegistrationForm.test.ts for valid submission, validation errors, and API failure",
|
|
129
133
|
"Run pnpm typecheck && pnpm lint && pnpm test — all pass"
|
|
130
134
|
],
|
|
135
|
+
"verificationCriteria": [
|
|
136
|
+
"TypeScript compiles with no errors",
|
|
137
|
+
"All existing tests pass plus new component tests",
|
|
138
|
+
"Form displays inline error messages for invalid email and phone",
|
|
139
|
+
"Successful submission calls POST /api/users with form data"
|
|
140
|
+
],
|
|
131
141
|
"blockedBy": ["1"]
|
|
132
142
|
}
|
|
133
143
|
]
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
## Project Resources (instruction files and `.claude/` directory)
|
|
2
2
|
|
|
3
|
-
Each repository may have project-specific instruction files and a `.claude/` directory. Check them during exploration
|
|
4
|
-
and
|
|
3
|
+
Each repository may have project-specific instruction files and a `.claude/` directory. Check them during exploration and
|
|
5
4
|
leverage them throughout planning:
|
|
6
5
|
|
|
7
6
|
- **`CLAUDE.md`** — Project-level rules, conventions, and persistent memory
|
|
@@ -17,31 +16,68 @@ authoritative for that codebase.
|
|
|
17
16
|
|
|
18
17
|
## What Makes a Great Task
|
|
19
18
|
|
|
20
|
-
A great task can be picked up cold, implemented independently, and verified as done
|
|
21
|
-
|
|
19
|
+
A great task can be picked up cold by an AI agent, implemented independently, and verified as done — by a _different_ AI
|
|
20
|
+
agent (the evaluator). The litmus test: "Could an independent reviewer verify this task is done using only the
|
|
21
|
+
verification criteria and the codebase?" If not, the task needs work.
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
<task-qualities>
|
|
24
24
|
|
|
25
|
-
- **Clear scope** —
|
|
26
|
-
- **Verifiable result** —
|
|
27
|
-
- **Independence** —
|
|
25
|
+
- **Clear scope** — which files/modules change, and what the outcome looks like
|
|
26
|
+
- **Verifiable result** — can be checked with tests, type checks, or other project commands
|
|
27
|
+
- **Independence** — can be implemented without waiting on other tasks (unless explicitly declared via `blockedBy`)
|
|
28
|
+
- **Pattern reference** — steps reference existing similar code the agent should follow (feedforward guidance)
|
|
29
|
+
|
|
30
|
+
</task-qualities>
|
|
28
31
|
|
|
29
32
|
### Task Sizing
|
|
30
33
|
|
|
31
34
|
Completable in a single AI session: 1-3 primary files (up to 5-7 total with tests), ~50-200 lines of meaningful
|
|
32
35
|
changes, one logical change per task. Split if too large, merge if too small.
|
|
33
36
|
|
|
34
|
-
|
|
37
|
+
Too granular (three tasks that should be one):
|
|
35
38
|
|
|
36
39
|
- "Create date formatting utility"
|
|
37
40
|
- "Refactor experience module to use date utility"
|
|
38
41
|
- "Refactor certifications module to use date utility"
|
|
39
42
|
|
|
40
|
-
|
|
43
|
+
Right size (one task covering the full change):
|
|
41
44
|
|
|
42
45
|
- "Centralize date formatting across all sections" — creates utility AND updates all usages
|
|
43
46
|
- "Improve style robustness in interactive components" — handles multiple related files
|
|
44
47
|
|
|
48
|
+
### Verification Criteria (The Evaluator Contract)
|
|
49
|
+
|
|
50
|
+
Every task must include a `verificationCriteria` array — these are the **done contract** between the generator (task
|
|
51
|
+
executor) and the evaluator (independent reviewer). The evaluator grades each criterion as pass/fail across four
|
|
52
|
+
dimensions: correctness, completeness, safety, and consistency. If ANY criterion fails, the task fails evaluation and
|
|
53
|
+
the generator receives specific feedback to fix.
|
|
54
|
+
|
|
55
|
+
Write criteria that are:
|
|
56
|
+
|
|
57
|
+
- **Computationally verifiable** where possible — prefer "TypeScript compiles with no errors" over "code is well-typed"
|
|
58
|
+
- **Observable** — the evaluator must be able to check it by running commands or reading code
|
|
59
|
+
- **Unambiguous** — two reviewers would agree on pass/fail
|
|
60
|
+
- **Outcome-oriented** — describe WHAT is true when done, not HOW to get there
|
|
61
|
+
|
|
62
|
+
> **Good criteria (verifiable, unambiguous):**
|
|
63
|
+
>
|
|
64
|
+
> - "TypeScript compiles with no errors"
|
|
65
|
+
> - "All existing tests pass plus new tests for the added feature"
|
|
66
|
+
> - "GET /api/users returns 200 with paginated user list"
|
|
67
|
+
> - "GET /api/users?page=-1 returns 400 with validation error"
|
|
68
|
+
> - "Component renders without console errors in browser"
|
|
69
|
+
> - "Playwright e2e: login flow completes without errors" _(UI tasks with Playwright configured)_
|
|
70
|
+
|
|
71
|
+
> **Bad criteria (vague, not independently verifiable):**
|
|
72
|
+
>
|
|
73
|
+
> - "Code is clean and well-structured"
|
|
74
|
+
> - "Error handling is appropriate"
|
|
75
|
+
> - "Performance is acceptable"
|
|
76
|
+
|
|
77
|
+
Aim for 2-4 criteria per task. Include at least one criterion that is computationally checkable (test pass, type check,
|
|
78
|
+
lint clean). For **UI/frontend tasks**, if the project has Playwright configured, add a browser-verifiable criterion —
|
|
79
|
+
the evaluator will attempt visual verification using Playwright or browser tools when the project supports it.
|
|
80
|
+
|
|
45
81
|
### Rules
|
|
46
82
|
|
|
47
83
|
1. **Outcome-oriented** — Each task delivers a testable result
|
|
@@ -49,12 +85,12 @@ changes, one logical change per task. Split if too large, merge if too small.
|
|
|
49
85
|
3. **Target 5-15 tasks** per scope, not 20-30 micro-tasks
|
|
50
86
|
4. **No artificial splits** — If tasks only make sense in sequence, merge them
|
|
51
87
|
|
|
52
|
-
### Anti-
|
|
88
|
+
### Anti-Patterns
|
|
53
89
|
|
|
54
|
-
- Separate tasks for "create utility" and "integrate utility"
|
|
55
|
-
- One task per file modification
|
|
56
|
-
- Tasks that are "blocked by" the previous task for trivial reasons
|
|
57
|
-
- Micro-refactoring tasks (add directive, remove import, etc.)
|
|
90
|
+
- Separate tasks for "create utility" and "integrate utility" — always merge create+use
|
|
91
|
+
- One task per file modification — group by logical change, not by file
|
|
92
|
+
- Tasks that are "blocked by" the previous task for trivial reasons — false chains kill parallelism
|
|
93
|
+
- Micro-refactoring tasks (add directive, remove import, etc.) — fold into the task that needs them
|
|
58
94
|
|
|
59
95
|
## Non-Overlapping File Ownership
|
|
60
96
|
|
|
@@ -123,11 +159,14 @@ Every task must include explicit, actionable steps — the implementation checkl
|
|
|
123
159
|
|
|
124
160
|
1. **Specific file references** — Name exact files/directories to create or modify
|
|
125
161
|
2. **Concrete actions** — "Add function X to file Y", not "implement the feature"
|
|
126
|
-
3. **
|
|
162
|
+
3. **Pattern references** — When possible, point to existing code the agent should follow: "Follow the pattern in
|
|
163
|
+
`src/controllers/users.ts` for error handling and response format." This is feedforward guidance — it steers the
|
|
164
|
+
agent toward correct behavior before it starts.
|
|
165
|
+
4. **Verification included** — Last step(s) should include project-specific verification commands from the repository
|
|
127
166
|
instruction files
|
|
128
|
-
|
|
167
|
+
5. **No ambiguity** — Another developer should be able to follow steps without guessing
|
|
129
168
|
|
|
130
|
-
|
|
169
|
+
Bad — vague steps that force the agent to guess:
|
|
131
170
|
|
|
132
171
|
```json
|
|
133
172
|
{
|
|
@@ -136,19 +175,25 @@ Every task must include explicit, actionable steps — the implementation checkl
|
|
|
136
175
|
}
|
|
137
176
|
```
|
|
138
177
|
|
|
139
|
-
|
|
178
|
+
Good — precise steps with file paths and pattern references:
|
|
140
179
|
|
|
141
180
|
```json
|
|
142
181
|
{
|
|
143
182
|
"name": "Add user authentication",
|
|
144
183
|
"projectPath": "/Users/dev/my-app",
|
|
145
184
|
"steps": [
|
|
146
|
-
"Create auth service in src/services/auth.ts with login(), logout(), getCurrentUser()",
|
|
147
|
-
"Add AuthContext provider in src/contexts/AuthContext.tsx wrapping the app",
|
|
185
|
+
"Create auth service in src/services/auth.ts with login(), logout(), getCurrentUser() — follow the pattern in src/services/user.ts for error handling and return types",
|
|
186
|
+
"Add AuthContext provider in src/contexts/AuthContext.tsx wrapping the app — follow existing ThemeContext pattern",
|
|
148
187
|
"Create useAuth hook in src/hooks/useAuth.ts exposing auth state and actions",
|
|
149
188
|
"Add ProtectedRoute wrapper component in src/components/ProtectedRoute.tsx",
|
|
150
|
-
"Write unit tests in src/services/__tests__/auth.test.ts",
|
|
189
|
+
"Write unit tests in src/services/__tests__/auth.test.ts — follow test patterns in src/services/__tests__/user.test.ts",
|
|
151
190
|
"Run pnpm typecheck && pnpm lint && pnpm test — all pass"
|
|
191
|
+
],
|
|
192
|
+
"verificationCriteria": [
|
|
193
|
+
"TypeScript compiles with no errors",
|
|
194
|
+
"All existing tests pass plus new auth tests",
|
|
195
|
+
"ProtectedRoute redirects unauthenticated users to /login",
|
|
196
|
+
"useAuth hook exposes isAuthenticated, user, login, and logout"
|
|
152
197
|
]
|
|
153
198
|
}
|
|
154
199
|
```
|