ralphctl 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -86
- package/dist/{add-SEDQ3VK7.mjs → add-DWNLZQ7Q.mjs} +4 -4
- package/dist/{add-TGJTRHIF.mjs → add-K7LNOYQ4.mjs} +3 -3
- package/dist/{chunk-LG6B7QVO.mjs → chunk-7TBO6GOT.mjs} +1 -1
- package/dist/{chunk-ZDEVRTGY.mjs → chunk-GLDPHKEW.mjs} +9 -0
- package/dist/{chunk-KPTPKLXY.mjs → chunk-ITRZMBLJ.mjs} +1 -1
- package/dist/{chunk-Q3VWJARJ.mjs → chunk-LAERLCL5.mjs} +2 -2
- package/dist/{chunk-AXNZMHFQ.mjs → chunk-ORVGM6EV.mjs} +80 -18
- package/dist/{chunk-XPDI4SYI.mjs → chunk-QYF7QIZJ.mjs} +3 -3
- package/dist/{chunk-XQHEKKDN.mjs → chunk-V4ZUDZCG.mjs} +1 -1
- package/dist/cli.mjs +105 -16
- package/dist/{create-DJHCP7LN.mjs → create-5MILNF7E.mjs} +3 -3
- package/dist/{handle-CCTBNAJZ.mjs → handle-2BACSJLR.mjs} +1 -1
- package/dist/{project-ZYGNPVGL.mjs → project-XC7AXA4B.mjs} +2 -2
- package/dist/prompts/ideate-auto.md +15 -5
- package/dist/prompts/ideate.md +28 -12
- package/dist/prompts/plan-auto.md +27 -17
- package/dist/prompts/plan-common.md +67 -22
- package/dist/prompts/plan-interactive.md +26 -27
- package/dist/prompts/task-evaluation.md +149 -23
- package/dist/prompts/task-execution.md +60 -37
- package/dist/prompts/ticket-refine.md +25 -21
- package/dist/{resolver-L52KR4GY.mjs → resolver-CFY6DIOP.mjs} +2 -2
- package/dist/{sprint-LUXAV3Q3.mjs → sprint-F4VRAEWZ.mjs} +2 -2
- package/dist/{wizard-TFJXEYD2.mjs → wizard-RCQ4QQOL.mjs} +6 -6
- package/package.json +6 -6
- package/schemas/task-import.schema.json +7 -0
- package/schemas/tasks.schema.json +8 -0
package/README.md
CHANGED
|
@@ -4,53 +4,74 @@
|
|
|
4
4
|
[](./LICENSE)
|
|
5
5
|
[](https://www.typescriptlang.org/)
|
|
6
6
|
[](https://nodejs.org/)
|
|
7
|
-
[](https://prettier.io/)
|
|
8
|
-
[](https://eslint.org/)
|
|
9
7
|
[](./CONTRIBUTING.md)
|
|
10
8
|
[](https://docs.anthropic.com/en/docs/claude-code)
|
|
11
9
|
[](https://docs.github.com/en/copilot/github-copilot-in-the-cli)
|
|
12
|
-
[](https://github.com/lukas-grigis/ralphctl)
|
|
13
10
|
|
|
14
11
|
```
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
12
|
+
██████╗ █████╗ ██╗ ██████╗ ██╗ ██╗ ██████╗████████╗██╗
|
|
13
|
+
██╔══██╗██╔══██╗██║ ██╔══██╗██║ ██║██╔════╝╚══██╔══╝██║
|
|
14
|
+
██████╔╝███████║██║ ██████╔╝███████║██║ ██║ ██║
|
|
15
|
+
██╔══██╗██╔══██║██║ ██╔═══╝ ██╔══██║██║ ██║ ██║
|
|
16
|
+
██║ ██║██║ ██║███████╗██║ ██║ ██║╚██████╗ ██║ ███████╗
|
|
17
|
+
╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚══════╝
|
|
21
18
|
```
|
|
22
19
|
|
|
23
|
-
**Agent harness for long-running AI coding tasks —
|
|
20
|
+
**Agent harness for long-running AI coding tasks —
|
|
21
|
+
orchestrates [Claude Code](https://docs.anthropic.com/en/docs/claude-code) & [GitHub Copilot](https://docs.github.com/en/copilot/github-copilot-in-the-cli)
|
|
22
|
+
across repositories.**
|
|
24
23
|
|
|
25
24
|
> _"I'm helping!"_ — Ralph Wiggum
|
|
26
25
|
|
|
27
26
|
> [!NOTE]
|
|
28
27
|
> **Early access.** RalphCTL is under active development. Things work, but expect rough edges and breaking changes
|
|
29
|
-
> before 1.0.
|
|
28
|
+
> before 1.0.
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Why ralphctl?
|
|
33
|
+
|
|
34
|
+
AI coding agents are powerful but lose context on long tasks, need babysitting when things break, and have no way to
|
|
35
|
+
coordinate changes across multiple repositories. RalphCTL decomposes your work into dependency-ordered tasks, runs each
|
|
36
|
+
one through a [generator-evaluator loop](https://www.anthropic.com/engineering/harness-design-long-running-apps) that
|
|
37
|
+
catches issues before moving on, and persists context across sessions so nothing gets lost. You describe what to build —
|
|
38
|
+
ralphctl handles the rest.
|
|
35
39
|
|
|
36
40
|
---
|
|
37
41
|
|
|
38
|
-
##
|
|
42
|
+
## How It Works
|
|
39
43
|
|
|
40
|
-
```bash
|
|
41
|
-
npm install -g ralphctl
|
|
42
44
|
```
|
|
45
|
+
You describe what to build ralphctl handles the rest
|
|
46
|
+
───────────────────────── ─────────────────────────────────
|
|
47
|
+
┌──────────┐ ┌──────────┐ ┌────────┐ ┌──────┐ ┌─────────┐
|
|
48
|
+
│ Create │──>│ Add │───────>│ Refine │──>│ Plan │──>│ Execute │
|
|
49
|
+
│ Sprint │ │ Tickets │ │ (WHAT) │ │(HOW) │ │ Loop │
|
|
50
|
+
└──────────┘ └──────────┘ └────────┘ └──────┘ └─────────┘
|
|
51
|
+
│ │ │
|
|
52
|
+
AI clarifies AI generates AI implements
|
|
53
|
+
requirements task graph + AI reviews
|
|
54
|
+
with you from specs each task
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
- **Dependency-ordered execution** — tasks run in the right sequence, one per repo at a time, with parallel execution
|
|
58
|
+
where possible
|
|
59
|
+
- **Generator-evaluator cycle** — an independent AI reviewer checks each task against its spec; if it fails, the
|
|
60
|
+
generator gets feedback and iterates
|
|
61
|
+
- **Context persistence** — sprint state, progress history, and task context survive across sessions; interrupted work
|
|
62
|
+
resumes where it left off
|
|
43
63
|
|
|
44
|
-
|
|
64
|
+
---
|
|
45
65
|
|
|
46
|
-
|
|
66
|
+
## Quick Start
|
|
47
67
|
|
|
48
|
-
|
|
49
|
-
-
|
|
50
|
-
|
|
51
|
-
or [GitHub Copilot CLI](https://docs.github.com/en/copilot/github-copilot-in-the-cli) installed and authenticated
|
|
68
|
+
```bash
|
|
69
|
+
npm install -g ralphctl
|
|
70
|
+
```
|
|
52
71
|
|
|
53
|
-
|
|
72
|
+
Requires [Node.js](https://nodejs.org/) >= 24, [Git](https://git-scm.com/), and
|
|
73
|
+
either [Claude CLI](https://docs.anthropic.com/en/docs/claude-code)
|
|
74
|
+
or [GitHub Copilot CLI](https://docs.github.com/en/copilot/github-copilot-in-the-cli) installed and authenticated.
|
|
54
75
|
|
|
55
76
|
```bash
|
|
56
77
|
# 1. Register a project (points to your repo)
|
|
@@ -68,36 +89,65 @@ ralphctl sprint plan
|
|
|
68
89
|
ralphctl sprint start
|
|
69
90
|
```
|
|
70
91
|
|
|
71
|
-
Or
|
|
92
|
+
Or run `ralphctl` with no arguments for an interactive menu that walks you through everything.
|
|
72
93
|
|
|
73
94
|
---
|
|
74
95
|
|
|
75
|
-
##
|
|
96
|
+
## Features
|
|
76
97
|
|
|
77
|
-
-
|
|
78
|
-
-
|
|
79
|
-
|
|
80
|
-
-
|
|
81
|
-
-
|
|
82
|
-
-
|
|
83
|
-
|
|
98
|
+
- **Break big tickets into small tasks** — dependency-ordered so they execute in the right sequence
|
|
99
|
+
- **Catch mistakes before they compound** — independent AI review after each task, iterating until quality passes or
|
|
100
|
+
budget is exhausted
|
|
101
|
+
- **Coordinate across repositories** — one sprint can span multiple repos with automatic dependency tracking
|
|
102
|
+
- **Run tasks in parallel** — one per repo, with rate-limit backoff and automatic session resume
|
|
103
|
+
- **Separate the what from the how** — AI clarifies requirements first, then generates implementation tasks, with human
|
|
104
|
+
approval gates
|
|
105
|
+
- **Pick up where you left off** — full state persistence across sessions; interrupted work resumes automatically
|
|
106
|
+
- **Pair or let it run** — work alongside your AI agent interactively, or let it execute unattended
|
|
107
|
+
- **Zero-memorization start** — run `ralphctl` with no args for a guided menu
|
|
84
108
|
|
|
85
109
|
---
|
|
86
110
|
|
|
87
|
-
##
|
|
111
|
+
## Configuration
|
|
112
|
+
|
|
113
|
+
RalphCTL supports **Claude Code** and **GitHub Copilot** as AI backends.
|
|
88
114
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
-
|
|
95
|
-
|
|
96
|
-
|
|
115
|
+
```bash
|
|
116
|
+
ralphctl config set provider claude # Use Claude Code
|
|
117
|
+
ralphctl config set provider copilot # Use GitHub Copilot
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Auto-prompts on first AI command if not set. Both CLIs must be in your PATH and authenticated.
|
|
121
|
+
|
|
122
|
+
<details>
|
|
123
|
+
<summary>Provider differences</summary>
|
|
124
|
+
|
|
125
|
+
| Feature | Claude Code | GitHub Copilot |
|
|
126
|
+
| --------------------------- | ------------------------------------ | -------------------------------------------------------------------- |
|
|
127
|
+
| Status | GA | Public preview |
|
|
128
|
+
| Headless execution | `-p --output-format json` | `-p --output-format json --autopilot --no-ask-user` |
|
|
129
|
+
| Session IDs | In JSON output (`session_id`) | In JSON output (`session_id`), `--share` file as fallback |
|
|
130
|
+
| Session resume (`--resume`) | Full support | Full support |
|
|
131
|
+
| Per-tool permissions | Settings files + `--permission-mode` | `--allow-all-tools` (all-or-nothing by default) |
|
|
132
|
+
| Fine-grained tool control | `allow`/`deny` in settings files | `--allow-tool`, `--deny-tool` flags (not yet used) |
|
|
133
|
+
| Rate limit detection | Validated patterns | Borrowed from Claude — not yet validated against real Copilot errors |
|
|
134
|
+
|
|
135
|
+
</details>
|
|
97
136
|
|
|
98
137
|
---
|
|
99
138
|
|
|
100
|
-
##
|
|
139
|
+
## Data Directory
|
|
140
|
+
|
|
141
|
+
All data lives in `~/.ralphctl/` by default. Override with:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
export RALPHCTL_ROOT="/path/to/custom/data-dir"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
<details>
|
|
150
|
+
<summary><strong>CLI Command Reference</strong></summary>
|
|
101
151
|
|
|
102
152
|
### Getting Started
|
|
103
153
|
|
|
@@ -135,7 +185,7 @@ Or just run `ralphctl` with no arguments for an interactive menu that walks you
|
|
|
135
185
|
| ------------------------ | --------------------------------- |
|
|
136
186
|
| `ralphctl sprint start` | Execute tasks with AI |
|
|
137
187
|
| `ralphctl sprint health` | Diagnose blockers and stale tasks |
|
|
138
|
-
| `ralphctl
|
|
188
|
+
| `ralphctl status` | Sprint overview with progress bar |
|
|
139
189
|
| `ralphctl task list` | List tasks in the current sprint |
|
|
140
190
|
| `ralphctl task next` | Show the next unblocked task |
|
|
141
191
|
| `ralphctl sprint close` | Close an active sprint |
|
|
@@ -143,54 +193,22 @@ Or just run `ralphctl` with no arguments for an interactive menu that walks you
|
|
|
143
193
|
|
|
144
194
|
Run `ralphctl <command> --help` for details on any command.
|
|
145
195
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
## AI Provider Configuration
|
|
149
|
-
|
|
150
|
-
RalphCTL supports **Claude Code** and **GitHub Copilot** as AI backends. Both use the same prompt templates and
|
|
151
|
-
workflow.
|
|
152
|
-
|
|
153
|
-
```bash
|
|
154
|
-
ralphctl config set provider claude # Use Claude Code
|
|
155
|
-
ralphctl config set provider copilot # Use GitHub Copilot
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
Auto-prompts on first AI command if not set. Both CLIs must be in your PATH and authenticated.
|
|
159
|
-
|
|
160
|
-
### Provider Differences
|
|
161
|
-
|
|
162
|
-
| Feature | Claude Code | GitHub Copilot |
|
|
163
|
-
| --------------------------- | ------------------------------------ | -------------------------------------------------------------------- |
|
|
164
|
-
| Status | GA | Public preview |
|
|
165
|
-
| Headless execution | `-p --output-format json` | `-p --output-format json --autopilot --no-ask-user` |
|
|
166
|
-
| Session IDs | In JSON output (`session_id`) | In JSON output (`session_id`), `--share` file as fallback |
|
|
167
|
-
| Session resume (`--resume`) | Full support | Full support |
|
|
168
|
-
| Per-tool permissions | Settings files + `--permission-mode` | `--allow-all-tools` (all-or-nothing by default) |
|
|
169
|
-
| Fine-grained tool control | `allow`/`deny` in settings files | `--allow-tool`, `--deny-tool` flags (not yet used) |
|
|
170
|
-
| Rate limit detection | Validated patterns | Borrowed from Claude — not yet validated against real Copilot errors |
|
|
196
|
+
</details>
|
|
171
197
|
|
|
172
198
|
---
|
|
173
199
|
|
|
174
200
|
## Documentation
|
|
175
201
|
|
|
176
|
-
|
|
|
177
|
-
|
|
|
178
|
-
| [
|
|
179
|
-
| [
|
|
180
|
-
| [
|
|
181
|
-
| [
|
|
182
|
-
| [CHANGELOG.md](./CHANGELOG.md) | Version history |
|
|
183
|
-
| [Blog post](https://lukasgrigis.dev/blog/building-ralphctl) | Background and motivation |
|
|
184
|
-
|
|
185
|
-
---
|
|
186
|
-
|
|
187
|
-
## Data Directory
|
|
202
|
+
| Resource | Description |
|
|
203
|
+
| ---------------------------------------------- | ------------------------------------------ |
|
|
204
|
+
| [Architecture](./.claude/docs/ARCHITECTURE.md) | Data models, file storage, error reference |
|
|
205
|
+
| [Requirements](./.claude/docs/REQUIREMENTS.md) | Acceptance criteria and feature checklist |
|
|
206
|
+
| [Contributing](./CONTRIBUTING.md) | Dev setup, code style, PR process |
|
|
207
|
+
| [Changelog](./CHANGELOG.md) | Version history |
|
|
188
208
|
|
|
189
|
-
|
|
209
|
+
**Blog posts:** [Building ralphctl](https://lukasgrigis.dev/blog/building-ralphctl) (backstory) | [From task CLI to agent harness](https://lukasgrigis.dev/blog/ralphctl-agent-harness/) (evaluator deep-dive)
|
|
190
210
|
|
|
191
|
-
|
|
192
|
-
export RALPHCTL_ROOT="/path/to/custom/data-dir"
|
|
193
|
-
```
|
|
211
|
+
**Further reading:** [Harness Engineering for Coding Agent Users](https://martinfowler.com/articles/harness-engineering.html) — Martin Fowler (April 2026) | [Harness Design for Long-Running Application Development](https://www.anthropic.com/engineering/harness-design-long-running-apps) — Anthropic Engineering
|
|
194
212
|
|
|
195
213
|
---
|
|
196
214
|
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
import {
|
|
3
3
|
addSingleTicketInteractive,
|
|
4
4
|
ticketAddCommand
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-QYF7QIZJ.mjs";
|
|
6
6
|
import "./chunk-7TG3EAQ2.mjs";
|
|
7
|
-
import "./chunk-
|
|
8
|
-
import "./chunk-
|
|
7
|
+
import "./chunk-7TBO6GOT.mjs";
|
|
8
|
+
import "./chunk-ITRZMBLJ.mjs";
|
|
9
9
|
import "./chunk-OEUJDSHY.mjs";
|
|
10
|
-
import "./chunk-
|
|
10
|
+
import "./chunk-GLDPHKEW.mjs";
|
|
11
11
|
import "./chunk-EDJX7TT6.mjs";
|
|
12
12
|
import "./chunk-QBXHAXHI.mjs";
|
|
13
13
|
export {
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
import {
|
|
3
3
|
addCheckScriptToRepository,
|
|
4
4
|
projectAddCommand
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-LAERLCL5.mjs";
|
|
6
6
|
import "./chunk-7LZ6GOGN.mjs";
|
|
7
7
|
import "./chunk-7TG3EAQ2.mjs";
|
|
8
|
-
import "./chunk-
|
|
8
|
+
import "./chunk-7TBO6GOT.mjs";
|
|
9
9
|
import "./chunk-OEUJDSHY.mjs";
|
|
10
|
-
import "./chunk-
|
|
10
|
+
import "./chunk-GLDPHKEW.mjs";
|
|
11
11
|
import "./chunk-EDJX7TT6.mjs";
|
|
12
12
|
import "./chunk-QBXHAXHI.mjs";
|
|
13
13
|
export {
|
|
@@ -53,13 +53,20 @@ function getTasksFilePath(sprintId) {
|
|
|
53
53
|
function getProgressFilePath(sprintId) {
|
|
54
54
|
return join(getSprintDir(sprintId), "progress.md");
|
|
55
55
|
}
|
|
56
|
+
function assertSafeSegment(segment, label) {
|
|
57
|
+
if (!segment || segment.includes("/") || segment.includes("\\") || segment.includes("..") || segment.includes("\0")) {
|
|
58
|
+
throw new Error(`Path traversal detected in ${label}: ${segment}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
56
61
|
function getRefinementDir(sprintId, ticketId) {
|
|
62
|
+
assertSafeSegment(ticketId, "ticket ID");
|
|
57
63
|
return join(getSprintDir(sprintId), "refinement", ticketId);
|
|
58
64
|
}
|
|
59
65
|
function getPlanningDir(sprintId) {
|
|
60
66
|
return join(getSprintDir(sprintId), "planning");
|
|
61
67
|
}
|
|
62
68
|
function getIdeateDir(sprintId, ticketId) {
|
|
69
|
+
assertSafeSegment(ticketId, "ticket ID");
|
|
63
70
|
return join(getSprintDir(sprintId), "ideation", ticketId);
|
|
64
71
|
}
|
|
65
72
|
function getSchemaPath(schemaName) {
|
|
@@ -233,6 +240,7 @@ var TaskSchema = z.object({
|
|
|
233
240
|
name: z.string().min(1),
|
|
234
241
|
description: z.string().optional(),
|
|
235
242
|
steps: z.array(z.string()).default([]),
|
|
243
|
+
verificationCriteria: z.array(z.string()).default([]),
|
|
236
244
|
status: TaskStatusSchema.default("todo"),
|
|
237
245
|
order: z.number().int().positive(),
|
|
238
246
|
ticketId: z.string().optional(),
|
|
@@ -257,6 +265,7 @@ var ImportTaskSchema = z.object({
|
|
|
257
265
|
// Required
|
|
258
266
|
description: z.string().optional(),
|
|
259
267
|
steps: z.array(z.string()).optional(),
|
|
268
|
+
verificationCriteria: z.array(z.string()).optional(),
|
|
260
269
|
ticketId: z.string().optional(),
|
|
261
270
|
blockedBy: z.array(z.string()).optional(),
|
|
262
271
|
projectPath: z.string().min(1)
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "./chunk-7TG3EAQ2.mjs";
|
|
9
9
|
import {
|
|
10
10
|
createProject
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-7TBO6GOT.mjs";
|
|
12
12
|
import {
|
|
13
13
|
ensureError,
|
|
14
14
|
wrapAsync
|
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
import {
|
|
17
17
|
expandTilde,
|
|
18
18
|
validateProjectPath
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-GLDPHKEW.mjs";
|
|
20
20
|
import {
|
|
21
21
|
IOError,
|
|
22
22
|
ProjectExistsError
|
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
getPendingRequirements,
|
|
12
12
|
groupTicketsByProject,
|
|
13
13
|
listTickets
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-QYF7QIZJ.mjs";
|
|
15
15
|
import {
|
|
16
16
|
EXIT_ALL_BLOCKED,
|
|
17
17
|
EXIT_ERROR,
|
|
@@ -23,7 +23,7 @@ import {
|
|
|
23
23
|
import {
|
|
24
24
|
getProject,
|
|
25
25
|
listProjects
|
|
26
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-7TBO6GOT.mjs";
|
|
27
27
|
import {
|
|
28
28
|
activateSprint,
|
|
29
29
|
assertSprintStatus,
|
|
@@ -40,7 +40,7 @@ import {
|
|
|
40
40
|
setAiProvider,
|
|
41
41
|
summarizeProgressForContext,
|
|
42
42
|
withFileLock
|
|
43
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-ITRZMBLJ.mjs";
|
|
44
44
|
import {
|
|
45
45
|
ensureError,
|
|
46
46
|
unwrapOrThrow,
|
|
@@ -61,7 +61,7 @@ import {
|
|
|
61
61
|
getTasksFilePath,
|
|
62
62
|
readValidatedJson,
|
|
63
63
|
writeValidatedJson
|
|
64
|
-
} from "./chunk-
|
|
64
|
+
} from "./chunk-GLDPHKEW.mjs";
|
|
65
65
|
import {
|
|
66
66
|
DependencyCycleError,
|
|
67
67
|
IOError,
|
|
@@ -162,10 +162,13 @@ function buildEvaluatorPrompt(ctx) {
|
|
|
162
162
|
const stepsSection = ctx.taskSteps.length > 0 ? `
|
|
163
163
|
**Implementation Steps:**
|
|
164
164
|
${ctx.taskSteps.map((s) => `- ${s}`).join("\n")}` : "";
|
|
165
|
+
const criteriaSection = ctx.verificationCriteria.length > 0 ? `
|
|
166
|
+
**Verification Criteria:**
|
|
167
|
+
${ctx.verificationCriteria.map((c) => `- ${c}`).join("\n")}` : "";
|
|
165
168
|
const checkSection = ctx.checkScriptSection ? `
|
|
166
169
|
|
|
167
170
|
${ctx.checkScriptSection}` : "";
|
|
168
|
-
return template.replaceAll("{{TASK_NAME}}", ctx.taskName).replace("{{TASK_DESCRIPTION_SECTION}}", descriptionSection).replace("{{TASK_STEPS_SECTION}}", stepsSection).replace("{{PROJECT_PATH}}", ctx.projectPath).replace("{{CHECK_SCRIPT_SECTION}}", checkSection);
|
|
171
|
+
return template.replaceAll("{{TASK_NAME}}", ctx.taskName).replace("{{TASK_DESCRIPTION_SECTION}}", descriptionSection).replace("{{TASK_STEPS_SECTION}}", stepsSection).replace("{{VERIFICATION_CRITERIA_SECTION}}", criteriaSection).replace("{{PROJECT_PATH}}", ctx.projectPath).replace("{{CHECK_SCRIPT_SECTION}}", checkSection);
|
|
169
172
|
}
|
|
170
173
|
|
|
171
174
|
// src/utils/requirements-export.ts
|
|
@@ -1087,6 +1090,7 @@ async function addTask(input3, sprintId) {
|
|
|
1087
1090
|
name: input3.name,
|
|
1088
1091
|
description: input3.description,
|
|
1089
1092
|
steps: input3.steps ?? [],
|
|
1093
|
+
verificationCriteria: input3.verificationCriteria ?? [],
|
|
1090
1094
|
status: "todo",
|
|
1091
1095
|
order: maxOrder + 1,
|
|
1092
1096
|
ticketId: input3.ticketId,
|
|
@@ -1320,6 +1324,7 @@ function validateImportTasks(importTasks2, existingTasks, ticketIds) {
|
|
|
1320
1324
|
name: t.name,
|
|
1321
1325
|
description: void 0,
|
|
1322
1326
|
steps: [],
|
|
1327
|
+
verificationCriteria: [],
|
|
1323
1328
|
status: "todo",
|
|
1324
1329
|
order: existingTasks.length + i + 1,
|
|
1325
1330
|
ticketId: void 0,
|
|
@@ -1355,7 +1360,7 @@ async function selectProject(message = "Select project:") {
|
|
|
1355
1360
|
default: true
|
|
1356
1361
|
});
|
|
1357
1362
|
if (create) {
|
|
1358
|
-
const { projectAddCommand } = await import("./add-
|
|
1363
|
+
const { projectAddCommand } = await import("./add-K7LNOYQ4.mjs");
|
|
1359
1364
|
await projectAddCommand({ interactive: true });
|
|
1360
1365
|
const updated = await listProjects();
|
|
1361
1366
|
if (updated.length === 0) return null;
|
|
@@ -1428,7 +1433,7 @@ async function selectSprint(message = "Select sprint:", filter) {
|
|
|
1428
1433
|
default: true
|
|
1429
1434
|
});
|
|
1430
1435
|
if (create) {
|
|
1431
|
-
const { sprintCreateCommand } = await import("./create-
|
|
1436
|
+
const { sprintCreateCommand } = await import("./create-5MILNF7E.mjs");
|
|
1432
1437
|
await sprintCreateCommand({ interactive: true });
|
|
1433
1438
|
const updated = await listSprints();
|
|
1434
1439
|
const refiltered = filter ? updated.filter((s) => filter.includes(s.status)) : updated;
|
|
@@ -1463,7 +1468,7 @@ async function selectTicket(message = "Select ticket:", filter) {
|
|
|
1463
1468
|
default: true
|
|
1464
1469
|
});
|
|
1465
1470
|
if (create) {
|
|
1466
|
-
const { ticketAddCommand } = await import("./add-
|
|
1471
|
+
const { ticketAddCommand } = await import("./add-DWNLZQ7Q.mjs");
|
|
1467
1472
|
await ticketAddCommand({ interactive: true });
|
|
1468
1473
|
const updated = await listTickets();
|
|
1469
1474
|
const refiltered = filter ? updated.filter(filter) : updated;
|
|
@@ -1658,6 +1663,7 @@ async function importTasksReplace(tasks, sprintId) {
|
|
|
1658
1663
|
name: taskInput.name,
|
|
1659
1664
|
description: taskInput.description,
|
|
1660
1665
|
steps: taskInput.steps ?? [],
|
|
1666
|
+
verificationCriteria: taskInput.verificationCriteria ?? [],
|
|
1661
1667
|
status: "todo",
|
|
1662
1668
|
order: newTasks.length + 1,
|
|
1663
1669
|
ticketId: taskInput.ticketId,
|
|
@@ -2321,6 +2327,16 @@ function formatTask(ctx) {
|
|
|
2321
2327
|
lines.push(`${String(i + 1)}. ${step}`);
|
|
2322
2328
|
});
|
|
2323
2329
|
}
|
|
2330
|
+
if (ctx.task.verificationCriteria.length > 0) {
|
|
2331
|
+
lines.push("");
|
|
2332
|
+
lines.push("## Verification Criteria");
|
|
2333
|
+
lines.push("");
|
|
2334
|
+
lines.push("The task is done when all of the following are true:");
|
|
2335
|
+
lines.push("");
|
|
2336
|
+
ctx.task.verificationCriteria.forEach((criterion) => {
|
|
2337
|
+
lines.push(`- ${criterion}`);
|
|
2338
|
+
});
|
|
2339
|
+
}
|
|
2324
2340
|
return lines.join("\n");
|
|
2325
2341
|
}
|
|
2326
2342
|
function buildFullTaskContext(ctx, progressSummary, gitHistory, checkScript, checkStatus) {
|
|
@@ -2472,30 +2488,53 @@ function getEvaluatorModel(generatorModel, provider) {
|
|
|
2472
2488
|
if (modelLower.includes("sonnet")) return "claude-haiku-4-5";
|
|
2473
2489
|
return "claude-haiku-4-5";
|
|
2474
2490
|
}
|
|
2491
|
+
var DIMENSION_NAMES = ["correctness", "completeness", "safety", "consistency"];
|
|
2492
|
+
var DIMENSION_PATTERNS = {
|
|
2493
|
+
correctness: /\*\*correctness\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i,
|
|
2494
|
+
completeness: /\*\*completeness\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i,
|
|
2495
|
+
safety: /\*\*safety\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i,
|
|
2496
|
+
consistency: /\*\*consistency\*\*\s*:\s*(PASS|FAIL)\s*(?:—|-)\s*(.+)/i
|
|
2497
|
+
};
|
|
2498
|
+
function parseDimensionScores(output) {
|
|
2499
|
+
const scores = [];
|
|
2500
|
+
for (const dim of DIMENSION_NAMES) {
|
|
2501
|
+
const match = DIMENSION_PATTERNS[dim].exec(output);
|
|
2502
|
+
if (match?.[1] && match[2]) {
|
|
2503
|
+
scores.push({
|
|
2504
|
+
dimension: dim,
|
|
2505
|
+
passed: match[1].toUpperCase() === "PASS",
|
|
2506
|
+
finding: match[2].trim()
|
|
2507
|
+
});
|
|
2508
|
+
}
|
|
2509
|
+
}
|
|
2510
|
+
return scores;
|
|
2511
|
+
}
|
|
2475
2512
|
function parseEvaluationResult(output) {
|
|
2513
|
+
const dimensions = parseDimensionScores(output);
|
|
2476
2514
|
if (output.includes("<evaluation-passed>")) {
|
|
2477
|
-
return { passed: true, output };
|
|
2515
|
+
return { passed: true, output, dimensions };
|
|
2478
2516
|
}
|
|
2479
2517
|
const failedMatch = /<evaluation-failed>([\s\S]*?)<\/evaluation-failed>/.exec(output);
|
|
2480
2518
|
if (failedMatch) {
|
|
2481
|
-
return { passed: false, output: failedMatch[1]?.trim() ?? output };
|
|
2519
|
+
return { passed: false, output: failedMatch[1]?.trim() ?? output, dimensions };
|
|
2482
2520
|
}
|
|
2483
|
-
return { passed: false, output };
|
|
2521
|
+
return { passed: false, output, dimensions };
|
|
2484
2522
|
}
|
|
2485
2523
|
function buildEvaluatorContext(task, checkScript) {
|
|
2486
|
-
const checkScriptSection = checkScript ? `## Check Script
|
|
2524
|
+
const checkScriptSection = checkScript ? `## Check Script (Computational Gate)
|
|
2487
2525
|
|
|
2488
|
-
|
|
2526
|
+
Run this check script as the **first step** of your review \u2014 it is the same gate the harness uses post-task:
|
|
2489
2527
|
|
|
2490
2528
|
\`\`\`
|
|
2491
2529
|
${checkScript}
|
|
2492
2530
|
\`\`\`
|
|
2493
2531
|
|
|
2494
|
-
|
|
2532
|
+
If this script fails, the implementation fails regardless of code quality. Record the full output.` : null;
|
|
2495
2533
|
return {
|
|
2496
2534
|
taskName: task.name,
|
|
2497
2535
|
taskDescription: task.description ?? "",
|
|
2498
2536
|
taskSteps: task.steps,
|
|
2537
|
+
verificationCriteria: task.verificationCriteria,
|
|
2499
2538
|
projectPath: task.projectPath,
|
|
2500
2539
|
checkScriptSection
|
|
2501
2540
|
};
|
|
@@ -2520,6 +2559,7 @@ async function runEvaluation(task, generatorModel, checkScript, sprintId, provid
|
|
|
2520
2559
|
}
|
|
2521
2560
|
|
|
2522
2561
|
// src/ai/executor.ts
|
|
2562
|
+
var DEFAULT_MAX_TURNS = 200;
|
|
2523
2563
|
function buildProviderArgs(options, provider) {
|
|
2524
2564
|
if (provider.name !== "claude") {
|
|
2525
2565
|
if (options.maxBudgetUsd != null) {
|
|
@@ -2528,6 +2568,9 @@ function buildProviderArgs(options, provider) {
|
|
|
2528
2568
|
if (options.fallbackModel) {
|
|
2529
2569
|
console.log(warning(`--fallback-model is only supported with the Claude provider \u2014 ignored`));
|
|
2530
2570
|
}
|
|
2571
|
+
if (options.maxTurns != null) {
|
|
2572
|
+
console.log(warning(`--max-turns is only supported with the Claude provider \u2014 ignored`));
|
|
2573
|
+
}
|
|
2531
2574
|
return [];
|
|
2532
2575
|
}
|
|
2533
2576
|
const args = [];
|
|
@@ -2537,6 +2580,7 @@ function buildProviderArgs(options, provider) {
|
|
|
2537
2580
|
if (options.fallbackModel) {
|
|
2538
2581
|
args.push("--fallback-model", options.fallbackModel);
|
|
2539
2582
|
}
|
|
2583
|
+
args.push("--max-turns", String(options.maxTurns ?? DEFAULT_MAX_TURNS));
|
|
2540
2584
|
return args;
|
|
2541
2585
|
}
|
|
2542
2586
|
async function executeTask(ctx, options, sprintId, resumeSessionId, provider, checkStatus) {
|
|
@@ -2672,6 +2716,8 @@ async function runEvaluationLoop(params) {
|
|
|
2672
2716
|
const evalCheckScript = getEffectiveCheckScript(project, task.projectPath);
|
|
2673
2717
|
const sprintDir = getSprintDir(sprintId);
|
|
2674
2718
|
let evalResult = await runEvaluation(task, result.model, evalCheckScript, sprintId, provider);
|
|
2719
|
+
let currentSessionId = result.sessionId;
|
|
2720
|
+
let currentModel = result.model;
|
|
2675
2721
|
for (let i = 0; i < evalIterations && !evalResult.passed; i++) {
|
|
2676
2722
|
console.log(warning(`Evaluation failed for ${task.name} (iteration ${String(i + 1)}/${String(evalIterations)})`));
|
|
2677
2723
|
console.log(muted(evalResult.output.slice(0, 500)));
|
|
@@ -2680,12 +2726,16 @@ async function runEvaluationLoop(params) {
|
|
|
2680
2726
|
{
|
|
2681
2727
|
cwd: task.projectPath,
|
|
2682
2728
|
args: ["--add-dir", sprintDir, ...buildProviderArgs(options, provider)],
|
|
2683
|
-
prompt: `The evaluator found issues with your
|
|
2729
|
+
prompt: `The evaluator found issues with your implementation:
|
|
2684
2730
|
|
|
2685
2731
|
${evalResult.output}
|
|
2686
2732
|
|
|
2687
|
-
|
|
2688
|
-
|
|
2733
|
+
Review the critique carefully. Fix each identified issue in the code, then:
|
|
2734
|
+
1. Re-run verification commands to confirm the fix
|
|
2735
|
+
${options.noCommit ? "" : "2. Commit the fix with a descriptive message\n"}${options.noCommit ? "2" : "3"}. Signal completion with <task-verified> and <task-complete>
|
|
2736
|
+
|
|
2737
|
+
If the critique is about something outside your task scope, fix only what is within scope and signal completion.`,
|
|
2738
|
+
resumeSessionId: currentSessionId ?? void 0,
|
|
2689
2739
|
env: provider.getSpawnEnv()
|
|
2690
2740
|
},
|
|
2691
2741
|
{
|
|
@@ -2699,6 +2749,8 @@ Fix these issues, then verify${options.noCommit ? "" : ", commit your fix,"} and
|
|
|
2699
2749
|
provider
|
|
2700
2750
|
);
|
|
2701
2751
|
resumeSpinner?.succeed(`Fix attempt completed: ${task.name}`);
|
|
2752
|
+
if (resumeResult.sessionId) currentSessionId = resumeResult.sessionId;
|
|
2753
|
+
if (resumeResult.model) currentModel = resumeResult.model;
|
|
2702
2754
|
const fixResult = parseExecutionResult(resumeResult.stdout);
|
|
2703
2755
|
if (!fixResult.success) {
|
|
2704
2756
|
console.log(warning(`Generator could not fix issues after feedback: ${task.name}`));
|
|
@@ -2712,7 +2764,7 @@ Fix these issues, then verify${options.noCommit ? "" : ", commit your fix,"} and
|
|
|
2712
2764
|
break;
|
|
2713
2765
|
}
|
|
2714
2766
|
}
|
|
2715
|
-
evalResult = await runEvaluation(task,
|
|
2767
|
+
evalResult = await runEvaluation(task, currentModel, evalCheckScript, sprintId, provider);
|
|
2716
2768
|
}
|
|
2717
2769
|
await updateTask(
|
|
2718
2770
|
task.id,
|
|
@@ -3797,6 +3849,16 @@ function parseArgs3(args) {
|
|
|
3797
3849
|
throw new Error("Invalid model name \u2014 must be 1-100 alphanumeric characters, dots, hyphens, or underscores");
|
|
3798
3850
|
}
|
|
3799
3851
|
options.fallbackModel = modelStr;
|
|
3852
|
+
} else if (arg === "--max-turns") {
|
|
3853
|
+
const turnsStr = args[++i];
|
|
3854
|
+
if (!turnsStr) {
|
|
3855
|
+
throw new Error("--max-turns requires a number");
|
|
3856
|
+
}
|
|
3857
|
+
const turns = parseInt(turnsStr, 10);
|
|
3858
|
+
if (isNaN(turns) || turns <= 0) {
|
|
3859
|
+
throw new Error("--max-turns must be a positive integer");
|
|
3860
|
+
}
|
|
3861
|
+
options.maxTurns = turns;
|
|
3800
3862
|
} else if (arg === "--no-evaluate") {
|
|
3801
3863
|
options.noEvaluate = true;
|
|
3802
3864
|
} else if (!arg?.startsWith("-")) {
|