ralphctl 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -9
- package/dist/{add-K7LNOYQ4.mjs → add-3T225IX5.mjs} +3 -3
- package/dist/{add-DWNLZQ7Q.mjs → add-6A5432U2.mjs} +4 -4
- package/dist/{chunk-QYF7QIZJ.mjs → chunk-742XQ7FL.mjs} +3 -3
- package/dist/{chunk-ORVGM6EV.mjs → chunk-CSICORGV.mjs} +583 -204
- package/dist/{chunk-V4ZUDZCG.mjs → chunk-DUU5346E.mjs} +1 -1
- package/dist/{chunk-7TBO6GOT.mjs → chunk-EUNAUHC3.mjs} +1 -1
- package/dist/{chunk-GLDPHKEW.mjs → chunk-IB6OCKZW.mjs} +15 -2
- package/dist/{chunk-ITRZMBLJ.mjs → chunk-JRFOUFD3.mjs} +1 -1
- package/dist/{chunk-LAERLCL5.mjs → chunk-UBPZHHCD.mjs} +2 -2
- package/dist/cli.mjs +29 -12
- package/dist/{create-5MILNF7E.mjs → create-MYGOWO2F.mjs} +3 -3
- package/dist/{handle-2BACSJLR.mjs → handle-TA4MYNQJ.mjs} +1 -1
- package/dist/{project-XC7AXA4B.mjs → project-YONEJICR.mjs} +2 -2
- package/dist/prompts/harness-context.md +5 -0
- package/dist/prompts/ideate-auto.md +34 -17
- package/dist/prompts/ideate.md +18 -2
- package/dist/prompts/plan-auto.md +7 -12
- package/dist/prompts/plan-common.md +18 -2
- package/dist/prompts/plan-interactive.md +8 -13
- package/dist/prompts/signals-evaluation.md +6 -0
- package/dist/prompts/signals-planning.md +5 -0
- package/dist/prompts/signals-task.md +7 -0
- package/dist/prompts/task-evaluation-resume.md +34 -0
- package/dist/prompts/task-evaluation.md +8 -0
- package/dist/prompts/task-execution.md +10 -19
- package/dist/prompts/validation-checklist.md +14 -0
- package/dist/{resolver-CFY6DIOP.mjs → resolver-RXEY6EJE.mjs} +2 -2
- package/dist/{sprint-F4VRAEWZ.mjs → sprint-FGLWYWKX.mjs} +2 -2
- package/dist/{wizard-RCQ4QQOL.mjs → wizard-XZ7OGBCJ.mjs} +6 -6
- package/package.json +1 -1
- package/schemas/tasks.schema.json +10 -1
|
@@ -53,6 +53,13 @@ function getTasksFilePath(sprintId) {
|
|
|
53
53
|
function getProgressFilePath(sprintId) {
|
|
54
54
|
return join(getSprintDir(sprintId), "progress.md");
|
|
55
55
|
}
|
|
56
|
+
function getEvaluationsDir(sprintId) {
|
|
57
|
+
return join(getSprintDir(sprintId), "evaluations");
|
|
58
|
+
}
|
|
59
|
+
function getEvaluationFilePath(sprintId, taskId) {
|
|
60
|
+
assertSafeSegment(taskId, "task ID");
|
|
61
|
+
return join(getEvaluationsDir(sprintId), `${taskId}.md`);
|
|
62
|
+
}
|
|
56
63
|
function assertSafeSegment(segment, label) {
|
|
57
64
|
if (!segment || segment.includes("/") || segment.includes("\\") || segment.includes("..") || segment.includes("\0")) {
|
|
58
65
|
throw new Error(`Path traversal detected in ${label}: ${segment}`);
|
|
@@ -203,6 +210,7 @@ import { z } from "zod";
|
|
|
203
210
|
var SprintStatusSchema = z.enum(["draft", "active", "closed"]);
|
|
204
211
|
var TaskStatusSchema = z.enum(["todo", "in_progress", "done"]);
|
|
205
212
|
var RequirementStatusSchema = z.enum(["pending", "approved"]);
|
|
213
|
+
var EvaluationStatusSchema = z.enum(["passed", "failed", "malformed"]);
|
|
206
214
|
var RepositorySchema = z.object({
|
|
207
215
|
name: z.string().min(1),
|
|
208
216
|
// Auto-derived from basename(path)
|
|
@@ -254,8 +262,12 @@ var TaskSchema = z.object({
|
|
|
254
262
|
// Output from verification run
|
|
255
263
|
evaluated: z.boolean().default(false),
|
|
256
264
|
// Whether evaluation passed
|
|
257
|
-
evaluationOutput: z.string().optional()
|
|
258
|
-
//
|
|
265
|
+
evaluationOutput: z.string().optional(),
|
|
266
|
+
// Truncated output from evaluation run (full critique lives in evaluationFile)
|
|
267
|
+
evaluationStatus: EvaluationStatusSchema.optional(),
|
|
268
|
+
// Discriminator: 'passed' | 'failed' | 'malformed'
|
|
269
|
+
evaluationFile: z.string().optional()
|
|
270
|
+
// Sidecar file path containing the full untruncated critique
|
|
259
271
|
});
|
|
260
272
|
var TasksSchema = z.array(TaskSchema);
|
|
261
273
|
var ImportTaskSchema = z.object({
|
|
@@ -309,6 +321,7 @@ export {
|
|
|
309
321
|
getSprintFilePath,
|
|
310
322
|
getTasksFilePath,
|
|
311
323
|
getProgressFilePath,
|
|
324
|
+
getEvaluationFilePath,
|
|
312
325
|
getRefinementDir,
|
|
313
326
|
getPlanningDir,
|
|
314
327
|
getIdeateDir,
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "./chunk-7TG3EAQ2.mjs";
|
|
9
9
|
import {
|
|
10
10
|
createProject
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
12
12
|
import {
|
|
13
13
|
ensureError,
|
|
14
14
|
wrapAsync
|
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
import {
|
|
17
17
|
expandTilde,
|
|
18
18
|
validateProjectPath
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-IB6OCKZW.mjs";
|
|
20
20
|
import {
|
|
21
21
|
IOError,
|
|
22
22
|
ProjectExistsError
|
package/dist/cli.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
addCheckScriptToRepository,
|
|
4
4
|
projectAddCommand
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-UBPZHHCD.mjs";
|
|
6
6
|
import {
|
|
7
7
|
addTask,
|
|
8
8
|
areAllTasksDone,
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
buildHeadlessAiRequest,
|
|
11
11
|
buildIdeateAutoPrompt,
|
|
12
12
|
buildIdeatePrompt,
|
|
13
|
+
buildProjectToolingSection,
|
|
13
14
|
buildTicketRefinePrompt,
|
|
14
15
|
exportRequirementsToMarkdown,
|
|
15
16
|
extractJsonArray,
|
|
@@ -52,13 +53,13 @@ import {
|
|
|
52
53
|
sprintStartCommand,
|
|
53
54
|
updateTaskStatus,
|
|
54
55
|
validateImportTasks
|
|
55
|
-
} from "./chunk-
|
|
56
|
+
} from "./chunk-CSICORGV.mjs";
|
|
56
57
|
import {
|
|
57
58
|
escapableSelect
|
|
58
59
|
} from "./chunk-7LZ6GOGN.mjs";
|
|
59
60
|
import {
|
|
60
61
|
sprintCreateCommand
|
|
61
|
-
} from "./chunk-
|
|
62
|
+
} from "./chunk-DUU5346E.mjs";
|
|
62
63
|
import {
|
|
63
64
|
addTicket,
|
|
64
65
|
allRequirementsApproved,
|
|
@@ -73,7 +74,7 @@ import {
|
|
|
73
74
|
removeTicket,
|
|
74
75
|
ticketAddCommand,
|
|
75
76
|
updateTicket
|
|
76
|
-
} from "./chunk-
|
|
77
|
+
} from "./chunk-742XQ7FL.mjs";
|
|
77
78
|
import {
|
|
78
79
|
EXIT_ERROR,
|
|
79
80
|
exitWithCode
|
|
@@ -84,7 +85,7 @@ import {
|
|
|
84
85
|
listProjects,
|
|
85
86
|
removeProject,
|
|
86
87
|
removeProjectRepo
|
|
87
|
-
} from "./chunk-
|
|
88
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
88
89
|
import {
|
|
89
90
|
DEFAULT_EVALUATION_ITERATIONS,
|
|
90
91
|
assertSprintStatus,
|
|
@@ -107,7 +108,7 @@ import {
|
|
|
107
108
|
setEditor,
|
|
108
109
|
setEvaluationIterations,
|
|
109
110
|
withFileLock
|
|
110
|
-
} from "./chunk-
|
|
111
|
+
} from "./chunk-JRFOUFD3.mjs";
|
|
111
112
|
import {
|
|
112
113
|
ensureError,
|
|
113
114
|
wrapAsync
|
|
@@ -134,7 +135,7 @@ import {
|
|
|
134
135
|
getTasksFilePath,
|
|
135
136
|
readValidatedJson,
|
|
136
137
|
validateProjectPath
|
|
137
|
-
} from "./chunk-
|
|
138
|
+
} from "./chunk-IB6OCKZW.mjs";
|
|
138
139
|
import {
|
|
139
140
|
DomainError,
|
|
140
141
|
NoCurrentSprintError,
|
|
@@ -1338,8 +1339,16 @@ async function sprintIdeateCommand(args) {
|
|
|
1338
1339
|
const schema = await getTaskImportSchema();
|
|
1339
1340
|
const ideateDir = getIdeateDir(id, ticket.id);
|
|
1340
1341
|
await mkdir(ideateDir, { recursive: true });
|
|
1342
|
+
const projectToolingSection = buildProjectToolingSection(selectedPaths);
|
|
1341
1343
|
if (options.auto) {
|
|
1342
|
-
const prompt = buildIdeateAutoPrompt(
|
|
1344
|
+
const prompt = buildIdeateAutoPrompt(
|
|
1345
|
+
ideaTitle,
|
|
1346
|
+
ideaDescription,
|
|
1347
|
+
projectName,
|
|
1348
|
+
repositoriesText,
|
|
1349
|
+
schema,
|
|
1350
|
+
projectToolingSection
|
|
1351
|
+
);
|
|
1343
1352
|
const spinner = createSpinner(`${providerName} is refining idea and planning tasks...`);
|
|
1344
1353
|
spinner.start();
|
|
1345
1354
|
const outputR = await wrapAsync(() => invokeAiAuto(prompt, selectedPaths, ideateDir), ensureError);
|
|
@@ -1419,7 +1428,15 @@ async function sprintIdeateCommand(args) {
|
|
|
1419
1428
|
log.newline();
|
|
1420
1429
|
} else {
|
|
1421
1430
|
const outputFile = join(ideateDir, "output.json");
|
|
1422
|
-
const prompt = buildIdeatePrompt(
|
|
1431
|
+
const prompt = buildIdeatePrompt(
|
|
1432
|
+
ideaTitle,
|
|
1433
|
+
ideaDescription,
|
|
1434
|
+
projectName,
|
|
1435
|
+
repositoriesText,
|
|
1436
|
+
outputFile,
|
|
1437
|
+
schema,
|
|
1438
|
+
projectToolingSection
|
|
1439
|
+
);
|
|
1423
1440
|
showInfo(`Starting interactive ${providerName} session...`);
|
|
1424
1441
|
console.log(muted(` Exploring: ${selectedPaths.join(", ")}`));
|
|
1425
1442
|
console.log(muted(`
|
|
@@ -3764,7 +3781,7 @@ async function interactiveMode() {
|
|
|
3764
3781
|
continue;
|
|
3765
3782
|
}
|
|
3766
3783
|
if (command === "wizard") {
|
|
3767
|
-
const { runWizard } = await import("./wizard-
|
|
3784
|
+
const { runWizard } = await import("./wizard-XZ7OGBCJ.mjs");
|
|
3768
3785
|
await runWizard();
|
|
3769
3786
|
continue;
|
|
3770
3787
|
}
|
|
@@ -4323,7 +4340,7 @@ Checks performed:
|
|
|
4323
4340
|
// package.json
|
|
4324
4341
|
var package_default = {
|
|
4325
4342
|
name: "ralphctl",
|
|
4326
|
-
version: "0.2.
|
|
4343
|
+
version: "0.2.5",
|
|
4327
4344
|
description: "Agent harness for long-running AI coding tasks \u2014 orchestrates Claude Code & GitHub Copilot across repositories",
|
|
4328
4345
|
homepage: "https://github.com/lukas-grigis/ralphctl",
|
|
4329
4346
|
type: "module",
|
|
@@ -4445,7 +4462,7 @@ registerCompletionCommands(program);
|
|
|
4445
4462
|
registerDoctorCommands(program);
|
|
4446
4463
|
async function main() {
|
|
4447
4464
|
if (process.env["COMP_CWORD"] && process.env["COMP_POINT"] && process.env["COMP_LINE"]) {
|
|
4448
|
-
const { handleCompletionRequest } = await import("./handle-
|
|
4465
|
+
const { handleCompletionRequest } = await import("./handle-TA4MYNQJ.mjs");
|
|
4449
4466
|
if (await handleCompletionRequest(program)) return;
|
|
4450
4467
|
}
|
|
4451
4468
|
if (process.argv.length <= 2 || process.argv[2] === "interactive") {
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
sprintCreateCommand
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
4
|
+
} from "./chunk-DUU5346E.mjs";
|
|
5
|
+
import "./chunk-JRFOUFD3.mjs";
|
|
6
6
|
import "./chunk-OEUJDSHY.mjs";
|
|
7
|
-
import "./chunk-
|
|
7
|
+
import "./chunk-IB6OCKZW.mjs";
|
|
8
8
|
import "./chunk-EDJX7TT6.mjs";
|
|
9
9
|
import "./chunk-QBXHAXHI.mjs";
|
|
10
10
|
export {
|
|
@@ -7,7 +7,7 @@ async function handleCompletionRequest(program) {
|
|
|
7
7
|
return false;
|
|
8
8
|
}
|
|
9
9
|
const tabtab = (await import("tabtab")).default;
|
|
10
|
-
const { resolveCompletions } = await import("./resolver-
|
|
10
|
+
const { resolveCompletions } = await import("./resolver-RXEY6EJE.mjs");
|
|
11
11
|
const tabEnv = tabtab.parseEnv(env);
|
|
12
12
|
const completions = await resolveCompletions(program, {
|
|
13
13
|
line: tabEnv.line,
|
|
@@ -9,8 +9,8 @@ import {
|
|
|
9
9
|
removeProject,
|
|
10
10
|
removeProjectRepo,
|
|
11
11
|
updateProject
|
|
12
|
-
} from "./chunk-
|
|
13
|
-
import "./chunk-
|
|
12
|
+
} from "./chunk-EUNAUHC3.mjs";
|
|
13
|
+
import "./chunk-IB6OCKZW.mjs";
|
|
14
14
|
import {
|
|
15
15
|
ProjectExistsError,
|
|
16
16
|
ProjectNotFoundError
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
<harness-context>
|
|
2
|
+
Your context window will be automatically compacted as it approaches its limit, allowing you to continue working
|
|
3
|
+
indefinitely. Do not stop early or rush completion due to token budget concerns — the harness manages session
|
|
4
|
+
lifecycle. Focus on doing the work correctly within your designated role.
|
|
5
|
+
</harness-context>
|
|
@@ -4,6 +4,10 @@ You are a combined requirements analyst and task planner working autonomously. T
|
|
|
4
4
|
requirements and a dependency-ordered set of implementation tasks. Make all decisions based on the idea description and
|
|
5
5
|
codebase analysis — there is no user to interact with.
|
|
6
6
|
|
|
7
|
+
{{HARNESS_CONTEXT}}
|
|
8
|
+
|
|
9
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
10
|
+
|
|
7
11
|
## Two-Phase Protocol
|
|
8
12
|
|
|
9
13
|
### Phase 1: Refine Requirements (WHAT)
|
|
@@ -50,13 +54,34 @@ Analyze the idea and produce complete, implementation-agnostic requirements:
|
|
|
50
54
|
|
|
51
55
|
### Phase 2: Plan Implementation (HOW)
|
|
52
56
|
|
|
53
|
-
|
|
57
|
+
Phase 2 begins with reconnaissance — orient yourself in the codebase before generating tasks. Skip exploration and your
|
|
58
|
+
plan will be guesswork.
|
|
59
|
+
|
|
60
|
+
#### Step 0: Explore the Project
|
|
61
|
+
|
|
62
|
+
Explore efficiently — read what matters, skip what does not:
|
|
63
|
+
|
|
64
|
+
1. **Read project instructions first** — start with `CLAUDE.md` if it exists, and also check provider-specific files
|
|
65
|
+
such as `.github/copilot-instructions.md` and `AGENTS.md` when present. Follow any links to other documentation.
|
|
66
|
+
Check the `.claude/` directory for agents, rules, and memory (see "Project Resources" in the Planning Common
|
|
67
|
+
Context below).
|
|
68
|
+
2. **Read manifest files** — `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod`, `pom.xml`, etc. for dependencies
|
|
69
|
+
and scripts
|
|
70
|
+
3. **Read README** — project overview, setup, and architecture
|
|
71
|
+
4. **Scan directory structure** — understand the layout before diving into files
|
|
72
|
+
5. **Find similar implementations** — look for existing features similar to what the requirements call for; follow
|
|
73
|
+
their patterns
|
|
74
|
+
6. **Extract verification commands** — find the exact build, test, lint, and typecheck commands from the repository
|
|
75
|
+
instruction files or project config
|
|
54
76
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
77
|
+
Read project instruction files and README first, then only the specific files needed to understand patterns and plan
|
|
78
|
+
tasks — broad exploration wastes context budget without improving task quality.
|
|
79
|
+
|
|
80
|
+
#### Step 1: Generate the Plan
|
|
81
|
+
|
|
82
|
+
1. **Map requirements to implementation** — Determine which parts of the approved requirements map to which repository
|
|
83
|
+
2. **Create tasks** — Following the Planning Common Context guidelines below
|
|
84
|
+
3. **Validate** — Ensure tasks are non-overlapping, properly ordered, and completable
|
|
60
85
|
|
|
61
86
|
### Blocker Handling
|
|
62
87
|
|
|
@@ -84,17 +109,7 @@ You have access to these repositories:
|
|
|
84
109
|
|
|
85
110
|
{{COMMON}}
|
|
86
111
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
Before outputting JSON, verify:
|
|
90
|
-
|
|
91
|
-
1. **Requirements complete** — Problem statement, acceptance criteria, and scope boundaries are all present
|
|
92
|
-
2. **No file overlap** — No two tasks modify the same files (or overlap is delineated in steps)
|
|
93
|
-
3. **Correct order** — Foundations before dependents, all `blockedBy` references point to earlier tasks
|
|
94
|
-
4. **Maximized parallelism** — Independent tasks do NOT block each other unnecessarily
|
|
95
|
-
5. **Precise steps** — Every task has 3+ specific, actionable steps with file references
|
|
96
|
-
6. **Verification steps** — Every task ends with project-appropriate verification commands
|
|
97
|
-
7. **projectPath assigned** — Every task uses a path from the Selected Repositories
|
|
112
|
+
{{VALIDATION}}
|
|
98
113
|
|
|
99
114
|
## Output Format
|
|
100
115
|
|
|
@@ -149,6 +164,8 @@ If you cannot produce a valid plan, output `<planning-blocked>reason</planning-b
|
|
|
149
164
|
}
|
|
150
165
|
```
|
|
151
166
|
|
|
167
|
+
{{SIGNALS}}
|
|
168
|
+
|
|
152
169
|
---
|
|
153
170
|
|
|
154
171
|
Proceed autonomously: refine the idea into clear requirements, explore the codebase, then generate tasks. Output only
|
package/dist/prompts/ideate.md
CHANGED
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
You are a combined requirements analyst and task planner. Your goal is to quickly turn a rough idea into refined
|
|
4
4
|
requirements and a dependency-ordered set of implementation tasks in a single session.
|
|
5
5
|
|
|
6
|
+
{{HARNESS_CONTEXT}}
|
|
7
|
+
|
|
8
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
9
|
+
|
|
6
10
|
## Two-Phase Protocol
|
|
7
11
|
|
|
8
12
|
### Phase 1: Refine Requirements (WHAT)
|
|
@@ -27,8 +31,16 @@ Focus: Clarify WHAT needs to be built (implementation-agnostic)
|
|
|
27
31
|
- What are the acceptance criteria? (Given/When/Then format)
|
|
28
32
|
- What edge cases and error states need handling?
|
|
29
33
|
- What are the business constraints? (performance, compatibility, etc.)
|
|
30
|
-
3. **Stop when ready** — Stop asking questions when
|
|
31
|
-
|
|
34
|
+
3. **Stop when ready** — Stop asking questions when ALL of these are true:
|
|
35
|
+
- The problem statement is clear and agreed upon
|
|
36
|
+
- Every functional requirement has at least one acceptance criterion
|
|
37
|
+
- Scope boundaries (in/out) are explicitly defined
|
|
38
|
+
- Major edge cases and error states are addressed
|
|
39
|
+
- No remaining ambiguity about what the feature should do — two developers reading these requirements would build
|
|
40
|
+
the same observable behavior
|
|
41
|
+
|
|
42
|
+
If the idea description already answers all of these, skip directly to Step 4.
|
|
43
|
+
|
|
32
44
|
4. **Present requirements** — Show the complete refined requirements in readable markdown, then ask for approval using
|
|
33
45
|
AskUserQuestion:
|
|
34
46
|
```
|
|
@@ -103,6 +115,8 @@ Focus: Determine HOW to implement the approved requirements
|
|
|
103
115
|
- Ask: "Does this task breakdown look correct? Any changes needed?"
|
|
104
116
|
7. **Wait for confirmation** — write the JSON to the output file after the user confirms
|
|
105
117
|
|
|
118
|
+
{{VALIDATION}}
|
|
119
|
+
|
|
106
120
|
## Idea to Refine and Plan
|
|
107
121
|
|
|
108
122
|
**Title:** {{IDEA_TITLE}}
|
|
@@ -176,6 +190,8 @@ Use this exact JSON Schema:
|
|
|
176
190
|
- Tasks can reference each other via `id` and `blockedBy`
|
|
177
191
|
- Only write after BOTH requirements AND task breakdown are approved
|
|
178
192
|
|
|
193
|
+
{{SIGNALS}}
|
|
194
|
+
|
|
179
195
|
---
|
|
180
196
|
|
|
181
197
|
Start with Phase 1: Read the idea above, identify what's clear vs ambiguous, then ask your first clarifying question.
|
|
@@ -4,6 +4,10 @@ You are a task planning specialist. Your goal is to produce a dependency-ordered
|
|
|
4
4
|
self-contained mini-spec that an AI agent can pick up cold and complete in a single session. Make all decisions
|
|
5
5
|
autonomously based on codebase analysis — there is no user to interact with.
|
|
6
6
|
|
|
7
|
+
{{HARNESS_CONTEXT}}
|
|
8
|
+
|
|
9
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
10
|
+
|
|
7
11
|
## Protocol
|
|
8
12
|
|
|
9
13
|
### Step 1: Explore the Project
|
|
@@ -65,18 +69,7 @@ If you cannot produce a valid task breakdown, signal the issue instead of output
|
|
|
65
69
|
|
|
66
70
|
### Step 6: Pre-Output Validation
|
|
67
71
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
1. **No file overlap** — No two tasks modify the same files (or overlap is explicitly delineated in steps)
|
|
71
|
-
2. **Correct order** — Foundations before dependents
|
|
72
|
-
3. **Valid dependencies** — All `blockedBy` references point to earlier tasks with real code dependencies
|
|
73
|
-
4. **Maximized parallelism** — Independent tasks do NOT block each other unnecessarily
|
|
74
|
-
5. **Precise steps** — Every task has 3+ specific, actionable steps with file references
|
|
75
|
-
6. **Verification steps** — Every task ends with project-appropriate verification commands from the repository
|
|
76
|
-
instructions
|
|
77
|
-
7. **projectPath assigned** — Every task has a `projectPath` from the project's repository paths
|
|
78
|
-
8. **Verification criteria** — Every task has 2-4 verificationCriteria that are testable and unambiguous
|
|
79
|
-
9. **Valid JSON** — The output parses as valid JSON matching the schema
|
|
72
|
+
{{VALIDATION}}
|
|
80
73
|
|
|
81
74
|
## Output
|
|
82
75
|
|
|
@@ -142,3 +135,5 @@ JSON Schema:
|
|
|
142
135
|
}
|
|
143
136
|
]
|
|
144
137
|
```
|
|
138
|
+
|
|
139
|
+
{{SIGNALS}}
|
|
@@ -78,7 +78,7 @@ Aim for 2-4 criteria per task. Include at least one criterion that is computatio
|
|
|
78
78
|
lint clean). For **UI/frontend tasks**, if the project has Playwright configured, add a browser-verifiable criterion —
|
|
79
79
|
the evaluator will attempt visual verification using Playwright or browser tools when the project supports it.
|
|
80
80
|
|
|
81
|
-
###
|
|
81
|
+
### Guidelines
|
|
82
82
|
|
|
83
83
|
1. **Outcome-oriented** — Each task delivers a testable result
|
|
84
84
|
2. **Merge create+use** — Never separate "create X" from "use X" — that is one task
|
|
@@ -108,7 +108,7 @@ the evaluator will attempt visual verification using Playwright or browser tools
|
|
|
108
108
|
|
|
109
109
|
Tasks execute in dependency order — foundations before dependents.
|
|
110
110
|
|
|
111
|
-
###
|
|
111
|
+
### Guidelines
|
|
112
112
|
|
|
113
113
|
1. **Foundation first** — Shared utilities, types, schemas before anything that uses them
|
|
114
114
|
2. **Declare all dependencies** — Use `blockedBy` to enforce order. Do not rely on array position alone.
|
|
@@ -205,3 +205,19 @@ commands.
|
|
|
205
205
|
|
|
206
206
|
Start with an action verb (Add, Create, Update, Fix, Refactor, Remove, Migrate). Include the feature/concept, not files.
|
|
207
207
|
Keep under 60 characters. Avoid vague verbs (Improve, Enhance, Handle).
|
|
208
|
+
|
|
209
|
+
## Delegation to Available Tooling
|
|
210
|
+
|
|
211
|
+
The "Project Tooling" section below (when present) lists subagents, skills, and MCP servers detected in the target
|
|
212
|
+
repositories. Use these in your task planning:
|
|
213
|
+
|
|
214
|
+
- **Surface tool delegation in task steps.** When a step's nature matches an available tool's specialization, write
|
|
215
|
+
the step so the executor knows to delegate. For example, if the tooling section lists a subagent specialized in
|
|
216
|
+
security review, security-sensitive task steps should explicitly recommend invoking it via the Task tool. Generic
|
|
217
|
+
pseudo-step: _"Delegate the final review of authentication changes to the `<name>` subagent via the Task tool."_
|
|
218
|
+
- **Pull verification criteria from available tools.** UI tasks should add browser-verifiable criteria when a
|
|
219
|
+
Playwright or similar MCP is listed. Database tasks should reference DB-inspection MCPs when present.
|
|
220
|
+
- **Do not invent tools.** Only reference tools that actually appear in the Project Tooling section. If the section is
|
|
221
|
+
empty or absent, omit delegation recommendations entirely — do not fabricate subagent names.
|
|
222
|
+
|
|
223
|
+
{{PROJECT_TOOLING}}
|
|
@@ -4,6 +4,10 @@ You are a task planning specialist collaborating with the user. Your goal is to
|
|
|
4
4
|
implementation tasks — each one a self-contained mini-spec that an AI agent can pick up cold and complete in a single
|
|
5
5
|
session.
|
|
6
6
|
|
|
7
|
+
{{HARNESS_CONTEXT}}
|
|
8
|
+
|
|
9
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
10
|
+
|
|
7
11
|
## Protocol
|
|
8
12
|
|
|
9
13
|
### Step 1: Explore the Project
|
|
@@ -47,7 +51,7 @@ selection.
|
|
|
47
51
|
|
|
48
52
|
Using the confirmed repositories and your codebase exploration, create tasks. Use the tools available to you:
|
|
49
53
|
|
|
50
|
-
Use available tools to search, explore, and read the codebase. When you need implementation decisions from the user, use AskUserQuestion:
|
|
54
|
+
Use available tools to search, explore, and read the codebase. When you need implementation decisions from the user, use AskUserQuestion with:
|
|
51
55
|
|
|
52
56
|
- **Recommended option first** with "(Recommended)" in the label
|
|
53
57
|
- **2-4 options** with descriptions explaining trade-offs
|
|
@@ -109,18 +113,7 @@ If you encounter issues that prevent planning, communicate clearly:
|
|
|
109
113
|
|
|
110
114
|
### Step 7: Pre-Output Checklist
|
|
111
115
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
- [ ] Each task modifies 1-3 primary files (up to 5-7 total including tests)
|
|
115
|
-
- [ ] No two tasks modify the same files without clear delineation in their steps
|
|
116
|
-
- [ ] Tasks are ordered so foundations come before dependents
|
|
117
|
-
- [ ] Every `blockedBy` reference points to an earlier task that produces code this task needs
|
|
118
|
-
- [ ] Independent tasks do NOT block each other (parallelism maximized)
|
|
119
|
-
- [ ] Every task has 3+ specific, actionable steps with file references
|
|
120
|
-
- [ ] Steps reference concrete files and functions from the actual codebase
|
|
121
|
-
- [ ] Each task includes verification using commands from the repository instruction files (if available)
|
|
122
|
-
- [ ] Every task has 2-4 verificationCriteria that are testable and unambiguous
|
|
123
|
-
- [ ] Every task has a `projectPath` from the project's repository paths
|
|
116
|
+
{{VALIDATION}}
|
|
124
117
|
|
|
125
118
|
## Sprint Context
|
|
126
119
|
|
|
@@ -185,6 +178,8 @@ Use this exact JSON Schema:
|
|
|
185
178
|
}
|
|
186
179
|
```
|
|
187
180
|
|
|
181
|
+
{{SIGNALS}}
|
|
182
|
+
|
|
188
183
|
---
|
|
189
184
|
|
|
190
185
|
Start by reading the repository instruction files and exploring the codebase, then discuss the approach with the user.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
<signals>
|
|
2
|
+
|
|
3
|
+
- `<task-verified>output</task-verified>` — Records verification results (required before completion)
|
|
4
|
+
- `<task-complete>` — Marks task as done (ONLY after verified)
|
|
5
|
+
- `<task-blocked>reason</task-blocked>` — Marks task as blocked (cannot proceed)
|
|
6
|
+
|
|
7
|
+
</signals>
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Evaluator Feedback — Fix and Re-verify
|
|
2
|
+
|
|
3
|
+
You are a task implementer responding to a code review. The independent reviewer's findings are
|
|
4
|
+
authoritative — fix each issue precisely, re-verify, and signal completion.
|
|
5
|
+
|
|
6
|
+
{{HARNESS_CONTEXT}}
|
|
7
|
+
|
|
8
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
9
|
+
|
|
10
|
+
<constraints>
|
|
11
|
+
|
|
12
|
+
- **Stay within scope** — fix only what the critique flags; do not expand the task or refactor neighboring code
|
|
13
|
+
- **Fix, don't rewrite** — make minimal targeted changes; preserve the existing implementation structure where possible
|
|
14
|
+
- **Don't argue with the critique** — treat reviewer findings as authoritative; if a finding is genuinely wrong, signal `<task-blocked>` instead of ignoring it
|
|
15
|
+
|
|
16
|
+
</constraints>
|
|
17
|
+
|
|
18
|
+
## Critique
|
|
19
|
+
|
|
20
|
+
{{CRITIQUE}}
|
|
21
|
+
|
|
22
|
+
## Fix Protocol
|
|
23
|
+
|
|
24
|
+
1. **Address each issue** — Reference the file:line locations the reviewer cited. If a citation is
|
|
25
|
+
wrong, find the actually-affected location and fix that.
|
|
26
|
+
2. **Re-run verification** — Run the project's check script (or the equivalent verification
|
|
27
|
+
commands) and confirm they pass.{{COMMIT_INSTRUCTION}}
|
|
28
|
+
3. **Output verification results** — Wrap output in `<task-verified>...</task-verified>`.
|
|
29
|
+
4. **Signal completion** — Output `<task-complete>` ONLY after all steps above pass.
|
|
30
|
+
|
|
31
|
+
If an issue is unfixable (contradicts the spec, or requires changes outside your scope), signal
|
|
32
|
+
`<task-blocked>reason</task-blocked>` instead of completing.
|
|
33
|
+
|
|
34
|
+
{{SIGNALS}}
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
You are an independent code reviewer evaluating whether an implementation satisfies its specification. Assume problems
|
|
4
4
|
exist until you prove otherwise through investigation.
|
|
5
5
|
|
|
6
|
+
{{HARNESS_CONTEXT}}
|
|
7
|
+
|
|
8
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
9
|
+
|
|
6
10
|
<task-specification>
|
|
7
11
|
|
|
8
12
|
These verification criteria are the pre-agreed definition of "done" — your primary grading rubric.
|
|
@@ -22,6 +26,8 @@ You are working in this project directory:
|
|
|
22
26
|
{{PROJECT_PATH}}
|
|
23
27
|
```
|
|
24
28
|
|
|
29
|
+
{{PROJECT_TOOLING}}
|
|
30
|
+
|
|
25
31
|
### Phase 1: Computational Verification (run before reasoning)
|
|
26
32
|
|
|
27
33
|
Run deterministic checks first — these are cheap, fast, and authoritative.
|
|
@@ -178,3 +184,5 @@ Each issue must reference which dimension it violates.]
|
|
|
178
184
|
> query: `WHERE name LIKE $1` with `%${query}%` as parameter.
|
|
179
185
|
|
|
180
186
|
Be direct and specific — point to files, lines, and concrete problems.
|
|
187
|
+
|
|
188
|
+
{{SIGNALS}}
|
|
@@ -6,13 +6,11 @@ completion. Do not expand scope beyond what the declared steps specify.
|
|
|
6
6
|
Implement the task described in {{CONTEXT_FILE}}. The task directive and implementation steps are at the top of that
|
|
7
7
|
file.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
Your context window will be automatically compacted as it approaches its limit, allowing you to continue working
|
|
11
|
-
indefinitely. Do not stop tasks early or rush completion due to token budget concerns. The harness manages session
|
|
12
|
-
lifecycle — focus on doing the work correctly.
|
|
13
|
-
</harness-context>
|
|
9
|
+
{{HARNESS_CONTEXT}}
|
|
14
10
|
|
|
15
|
-
|
|
11
|
+
When finished, emit a signal from the `<signals>` block below.
|
|
12
|
+
|
|
13
|
+
<constraints>
|
|
16
14
|
|
|
17
15
|
- **One task only** — complete this task, then stop. The harness manages task sequencing; continuing to the next task
|
|
18
16
|
would conflict with parallel execution.
|
|
@@ -29,7 +27,7 @@ lifecycle — focus on doing the work correctly.
|
|
|
29
27
|
- **Leave task definitions unchanged** — the task name, description, steps, and other task files are immutable.
|
|
30
28
|
{{COMMIT_CONSTRAINT}}
|
|
31
29
|
|
|
32
|
-
</
|
|
30
|
+
</constraints>
|
|
33
31
|
|
|
34
32
|
## Phase 1: Reconnaissance (feedforward — understand before acting)
|
|
35
33
|
|
|
@@ -77,9 +75,9 @@ Proceed to Phase 2 once all reconnaissance steps pass.
|
|
|
77
75
|
- If a step is unclear, attempt reasonable interpretation before marking blocked
|
|
78
76
|
- If steps seem incomplete relative to ticket requirements, signal `<task-blocked>` rather than improvising —
|
|
79
77
|
the planner may have intentionally scoped them this way to avoid conflicts
|
|
80
|
-
3. **
|
|
81
|
-
|
|
82
|
-
|
|
78
|
+
3. **Smoke-test as you go** — Run relevant test or typecheck commands after each meaningful code change to catch issues
|
|
79
|
+
early. This is incremental sanity-checking, not the final gate. **The authoritative gate is Phase 3 step 2 below:
|
|
80
|
+
the full check script runs there and must pass.**
|
|
83
81
|
|
|
84
82
|
## Phase 3: Completion
|
|
85
83
|
|
|
@@ -88,8 +86,7 @@ Complete these steps IN ORDER:
|
|
|
88
86
|
1. **Confirm all steps done** — Every task step has been completed
|
|
89
87
|
2. **Run ALL verification commands** — Execute every verification command (see Check Script section in the context file
|
|
90
88
|
or project instructions). Fix any failures before proceeding. The harness runs the check script as a post-task
|
|
91
|
-
gate — your task is not marked done unless it passes.
|
|
92
|
-
{{COMMIT_STEP}}
|
|
89
|
+
gate — your task is not marked done unless it passes.{{COMMIT_STEP}}
|
|
93
90
|
3. **Update progress file** — Append to {{PROGRESS_FILE}} using this format:
|
|
94
91
|
|
|
95
92
|
```markdown
|
|
@@ -175,10 +172,4 @@ Signal `<task-blocked>Missing dependency: [what and which task]</task-blocked>`.
|
|
|
175
172
|
Follow project patterns over steps if they conflict. If steps seem incomplete relative to requirements:
|
|
176
173
|
`<task-blocked>Steps incomplete: [what appears missing]</task-blocked>`.
|
|
177
174
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
- `<task-verified>output</task-verified>` — Records verification results (required before completion)
|
|
181
|
-
- `<task-complete>` — Marks task as done (ONLY after verified)
|
|
182
|
-
- `<task-blocked>reason</task-blocked>` — Marks task as blocked (cannot proceed)
|
|
183
|
-
|
|
184
|
-
</signals>
|
|
175
|
+
{{SIGNALS}}
|