pi-goal-x 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -17
- package/extensions/goal-auditor.ts +68 -41
- package/extensions/goal-draft.ts +53 -0
- package/extensions/goal-policy.ts +147 -8
- package/extensions/goal-questionnaire.ts +7 -0
- package/extensions/goal-record.ts +44 -19
- package/extensions/goal-settings.ts +95 -0
- package/extensions/goal.ts +267 -59
- package/extensions/prompts/goal-prompts.ts +86 -17
- package/extensions/storage/goal-files.ts +9 -7
- package/extensions/widgets/goal-widget.ts +46 -14
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,10 +10,28 @@ The extension is designed around one rule: **the user owns intent; the agent exe
|
|
|
10
10
|
|
|
11
11
|
All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are preserved. The following changes are specific to pi-goal-x:
|
|
12
12
|
|
|
13
|
-
###
|
|
13
|
+
### Verification contract system
|
|
14
14
|
|
|
15
|
-
- **
|
|
16
|
-
- **Per-task
|
|
15
|
+
- **Per-goal verification contracts** — when drafting a goal, include a `Verification contract:` section with plain-text requirements (e.g. "Run npm test (0 failures), grep for remaining STP references"). The contract is extracted, stored on the goal record, and enforced by the `complete_goal` tool — the call is rejected unless the agent provides a non-empty `verificationSummary` matching the contract.
|
|
16
|
+
- **Per-task verification contracts** — `propose_task_list` supports an optional `verificationContract` per task. If set, `complete_task` requires a non-empty `verificationSummary`.
|
|
17
|
+
- **Both prompt and tool enforcement** — prompts include a VERIFICATION CONTRACT section instructing the agent; tool validators reject calls that violate the contract.
|
|
18
|
+
- **Backward compatible** — goals/tasks without a `Verification contract:` section work exactly as before. No contract = no enforcement.
|
|
19
|
+
- **Auditor integration** — the independent completion auditor receives both the `verificationContract` and `verificationSummary` and cross-checks claims against real artifacts.
|
|
20
|
+
- **`complete_goal` `testResults` removed** — replaced with `verificationSummary`. The old structured test results interface is gone.
|
|
21
|
+
|
|
22
|
+
### Unified goal + task acceptance
|
|
23
|
+
|
|
24
|
+
- **Single-dialog confirmation** — `propose_goal_draft` now accepts an optional `tasks` array parameter. The confirmation dialog shows the goal objective AND the proposed task list together in a single rich TUI view with box-drawing panel (`┌─ TASKS ───┐`), section headers, and hierarchical indentation for subtasks.
|
|
25
|
+
- **Atomic creation** — one confirmation (single enter press) creates the goal AND its task list together. No need for separate `propose_goal_draft` + `propose_task_list` calls.
|
|
26
|
+
- **Backward compatible** — existing separate `propose_task_list` flow continues to work unchanged. Goals without tasks work as before.
|
|
27
|
+
|
|
28
|
+
### Task list & sub-task system
|
|
29
|
+
|
|
30
|
+
- **Structured task breakdown** — the agent can propose a task list via `propose_task_list` (standalone) or `propose_goal_draft` with `tasks` (unified). Both show a Confirm / Continue Chatting dialog. Once confirmed, tasks are displayed in prompts, the widget, serialized to disk, and included in auditor review.
|
|
31
|
+
- **Recursive subtasks** — tasks can have nested sub-tasks via `subtasks?: GoalTask[]` (full recursive type). Subtask depth is controlled globally by `subtaskDepth` in `.pi/goal-settings.json` (default: 1 level). Too-deep subtrees are rejected at proposal.
|
|
32
|
+
- **Lightweight subtasks** — each task has an optional `lightweightSubtasks?: boolean` flag. When true, the parent can complete regardless of subtask status. When false/absent (full subtasks), all subtasks must be individually complete before the parent can close.
|
|
33
|
+
- **Per-task completion** — `complete_task` marks individual tasks done with optional evidence/verificationSummary, and `skip_task` marks tasks as skipped with a required reason. Neither stops the turn, so the agent can continue uninterrupted.
|
|
34
|
+
- **Hierarchical display** — task lists with subtasks render with indentation in prompts (`taskListBlock`, `goalPrompt`, `continuationPrompt`) and in the TUI widget (recursive count, BFS next-pending).
|
|
17
35
|
- **Optional `taskList`** — goals without a task list work exactly as before. The feature is entirely opt-in.
|
|
18
36
|
- **Soft `complete_goal` gate** — when `blockCompletion: true` is set, `complete_goal` surfaces a warning if pending tasks remain (prompt-level only; the agent can still complete).
|
|
19
37
|
|
|
@@ -30,7 +48,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
|
|
|
30
48
|
### E2e test infrastructure
|
|
31
49
|
|
|
32
50
|
- **Deterministic fork tests using `--mode json`**: the e2e suite spawns a real `pi --fork --mode json` session, parses structured `tool_execution_start`/`tool_execution_end` JSON events for field-level assertions — no free-text AI output parsing. Uses `--append-system-prompt` + `--tools` to force deterministic tool calls.
|
|
33
|
-
- **Full coverage**:
|
|
51
|
+
- **Full coverage**: 281 tests total — function-level integration tests, mock-pi handler tests, file-validity checks, real `pi --fork --mode json` E2E tests, propose_goal_tweak unit/integration/e2e tests, task list policy/round-trip/render tests (including subtasks), and verification contract tests.
|
|
34
52
|
|
|
35
53
|
### Completion auditor
|
|
36
54
|
|
|
@@ -165,11 +183,11 @@ The extension exposes tools only when they make sense for the current lifecycle
|
|
|
165
183
|
| `get_goal` | always | Read the focused goal state; mentions other open goals when present |
|
|
166
184
|
| `propose_goal_draft` | drafting only (goal creation) | Submit a concrete draft for user confirmation |
|
|
167
185
|
| `propose_goal_tweak` | tweak drafting only | Submit a revision to an existing goal (shows Confirm / Continue Chatting dialog) |
|
|
168
|
-
| `complete_goal` | focused active or paused goal | Mark the focused goal complete —
|
|
186
|
+
| `complete_goal` | focused active or paused goal | Mark the focused goal complete — supply a `verificationSummary` covering all contract items. When the auditor is disabled, supply `confirmBypassAuditor: true` after user confirmation to bypass the audit |
|
|
169
187
|
| `pause_goal` | focused active goal | Pause the focused goal because of a real blocker |
|
|
170
188
|
| `abort_goal` | focused active or paused goal | Abort/archive an obsolete, impossible, unsafe, or user-cancelled focused goal |
|
|
171
189
|
| `propose_task_list` | active or paused goal | Propose a structured task list for user confirmation (stops the turn) |
|
|
172
|
-
| `complete_task` | active or paused goal | Mark a task complete with optional
|
|
190
|
+
| `complete_task` | active or paused goal | Mark a task complete with optional `verificationSummary`. If the task has a `verificationContract`, the summary is required (does not stop turn) |
|
|
173
191
|
| `skip_task` | active or paused goal | Mark a task skipped with a required reason (does not stop turn) |
|
|
174
192
|
| `propose_goal_tweak` | tweak drafting only | Submit a revision to the focused goal (shows Confirm / Continue Chatting dialog) |
|
|
175
193
|
| `step_complete` | hidden / legacy | Compatibility no-op; Sisyphus no longer requires a step counter |
|
|
@@ -205,17 +223,7 @@ Before archiving the goal, `update_goal` starts a separate pi agent in an isolat
|
|
|
205
223
|
|
|
206
224
|
The auditor is semantic, not a paperwork checklist: it should reject scaffold-only, alpha, generated-template, proxy-metric, build-only, or weakly verified completions when the real user outcome is not satisfied.
|
|
207
225
|
|
|
208
|
-
By default the auditor uses the current/default pi model. Configure it interactively with `/goal-settings`
|
|
209
|
-
|
|
210
|
-
```json
|
|
211
|
-
{
|
|
212
|
-
"provider": "fireworks",
|
|
213
|
-
"model": "accounts/fireworks/routers/kimi-k2p6-turbo",
|
|
214
|
-
"thinking_level": "high"
|
|
215
|
-
}
|
|
216
|
-
```
|
|
217
|
-
|
|
218
|
-
Environment variables `PI_GOAL_AUDITOR_PROVIDER`, `PI_GOAL_AUDITOR_MODEL`, and `PI_GOAL_AUDITOR_THINKING_LEVEL` take precedence over `/goal-settings`.
|
|
226
|
+
By default the auditor uses the current/default pi model. Configure it via `.pi/goal-auditor.json`, interactively with `/goal-settings` → `auditor`, or environment variables (see [Settings files](#settings-files)).
|
|
219
227
|
|
|
220
228
|
The completion result prints a full report into the conversation:
|
|
221
229
|
|
|
@@ -263,6 +271,52 @@ Before commands, tools, and lifecycle hooks act on a focused goal, the runtime r
|
|
|
263
271
|
|
|
264
272
|
Goal paths are constrained to `.pi/goals/` and `.pi/goals/archived/`; absolute paths, traversal, NUL bytes, symlinks, and unsafe metadata paths are rejected.
|
|
265
273
|
|
|
274
|
+
## Settings files
|
|
275
|
+
|
|
276
|
+
Configuration is split across two files under `.pi/`.
|
|
277
|
+
|
|
278
|
+
### `.pi/goal-settings.json`
|
|
279
|
+
|
|
280
|
+
Configured interactively via `/goal-settings`, or edited directly:
|
|
281
|
+
|
|
282
|
+
```json
|
|
283
|
+
{
|
|
284
|
+
"disableTasks": false,
|
|
285
|
+
"disableContracts": false,
|
|
286
|
+
"subtaskDepth": 1
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
| Field | Default | Purpose |
|
|
291
|
+
|---|---:|---|
|
|
292
|
+
| `disableTasks` | `false` | Suppress task list features entirely when `true` |
|
|
293
|
+
| `disableContracts` | `false` | Suppress verification contract enforcement when `true` |
|
|
294
|
+
| `subtaskDepth` | `1` | Maximum nesting depth for subtasks (`1` = tasks → subtasks, `2` = tasks → subtasks → sub-subtasks) |
|
|
295
|
+
|
|
296
|
+
**Env var overrides:** `PI_GOAL_DISABLE_TASKS=1` and `PI_GOAL_DISABLE_CONTRACTS=1` take precedence over the file. Set to any truthy string to disable.
|
|
297
|
+
|
|
298
|
+
### `.pi/goal-auditor.json`
|
|
299
|
+
|
|
300
|
+
Configured interactively via `/goal-settings` → `auditor`, or edited directly:
|
|
301
|
+
|
|
302
|
+
```json
|
|
303
|
+
{
|
|
304
|
+
"provider": "fireworks",
|
|
305
|
+
"model": "accounts/fireworks/models/deepseek-v4-flash",
|
|
306
|
+
"thinkingLevel": "high",
|
|
307
|
+
"disabled": false
|
|
308
|
+
}
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
| Field | Default | Purpose |
|
|
312
|
+
|---|---:|---|
|
|
313
|
+
| `provider` | system default | Provider name for the auditor agent (`anthropic`, `fireworks`, `google`, `groq`, etc.) |
|
|
314
|
+
| `model` | system default | Model name for the auditor agent |
|
|
315
|
+
| `thinkingLevel` | system default | Thinking level: `none`, `low`, `medium`, `high` |
|
|
316
|
+
| `disabled` | `false` | When `true`, skip the completion audit entirely |
|
|
317
|
+
|
|
318
|
+
**Env var overrides:** `PI_GOAL_AUDITOR_PROVIDER`, `PI_GOAL_AUDITOR_MODEL`, and `PI_GOAL_AUDITOR_THINKING_LEVEL` take precedence over file config. `PI_GOAL_AUDITOR_THINKING` is also accepted as an alias for the thinking level.
|
|
319
|
+
|
|
266
320
|
## Environment variables
|
|
267
321
|
|
|
268
322
|
| Variable | Default | Purpose |
|
|
@@ -13,7 +13,8 @@ import {
|
|
|
13
13
|
type ExtensionContext,
|
|
14
14
|
type ResourceLoader,
|
|
15
15
|
} from "@earendil-works/pi-coding-agent";
|
|
16
|
-
import type { GoalRecord, GoalTaskList } from "./goal-record.ts";
|
|
16
|
+
import type { GoalRecord, GoalTask, GoalTaskList } from "./goal-record.ts";
|
|
17
|
+
import type { GoalSettings } from "./goal-settings.ts";
|
|
17
18
|
|
|
18
19
|
export interface GoalAuditorConfig {
|
|
19
20
|
provider?: string;
|
|
@@ -127,29 +128,50 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
|
|
|
127
128
|
return { approved: approved && !disapproved, disapproved };
|
|
128
129
|
}
|
|
129
130
|
|
|
130
|
-
export interface
|
|
131
|
-
/**
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
131
|
+
export interface AuditorVerificationEvidence {
|
|
132
|
+
/** The agent's verification summary describing what was checked. */
|
|
133
|
+
summary: string;
|
|
134
|
+
/** The goal's verification contract (what the agent was required to verify), if any. */
|
|
135
|
+
contract?: string;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function renderAuditorTaskTree(tasks: GoalTask[], indent: number): string[] {
|
|
139
|
+
const prefix = " ".repeat(indent);
|
|
140
|
+
const lines: string[] = [];
|
|
141
|
+
for (const task of tasks) {
|
|
142
|
+
const marker = task.status === "complete" ? "[x]" : task.status === "skipped" ? "[~]" : "[ ]";
|
|
143
|
+
lines.push(`${prefix}${marker} ${task.id}: ${task.title}`);
|
|
144
|
+
if (task.subtasks && task.subtasks.length > 0) {
|
|
145
|
+
lines.push(...renderAuditorTaskTree(task.subtasks, indent + 1));
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return lines;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function countAuditorTasks(tasks: GoalTask[]): { total: number; complete: number; skipped: number; pending: number } {
|
|
152
|
+
let total = 0;
|
|
153
|
+
let complete = 0;
|
|
154
|
+
let skipped = 0;
|
|
155
|
+
for (const t of tasks) {
|
|
156
|
+
total++;
|
|
157
|
+
if (t.status === "complete") complete++;
|
|
158
|
+
else if (t.status === "skipped") skipped++;
|
|
159
|
+
if (t.subtasks && t.subtasks.length > 0) {
|
|
160
|
+
const child = countAuditorTasks(t.subtasks);
|
|
161
|
+
total += child.total;
|
|
162
|
+
complete += child.complete;
|
|
163
|
+
skipped += child.skipped;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return { total, complete, skipped, pending: total - complete - skipped };
|
|
139
167
|
}
|
|
140
168
|
|
|
141
169
|
function taskSummaryBlock(taskList?: GoalTaskList | null): string {
|
|
142
170
|
if (!taskList || taskList.tasks.length === 0) return "";
|
|
143
|
-
const total = taskList.tasks
|
|
144
|
-
const complete = taskList.tasks.filter((t) => t.status === "complete").length;
|
|
145
|
-
const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
|
|
146
|
-
const pending = taskList.tasks.filter((t) => t.status === "pending");
|
|
171
|
+
const { total, complete, skipped, pending } = countAuditorTasks(taskList.tasks);
|
|
147
172
|
const lines: string[] = [`Tasks: ${complete}/${total} complete${skipped > 0 ? `, ${skipped} skipped` : ""}`];
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
lines.push(` ${marker} ${task.id}: ${task.title}`);
|
|
151
|
-
}
|
|
152
|
-
const gate = taskList.blockCompletion && pending.length > 0 ? " | TASK GATE: pending tasks block completion" : "";
|
|
173
|
+
lines.push(...renderAuditorTaskTree(taskList.tasks, 0));
|
|
174
|
+
const gate = taskList.blockCompletion && pending > 0 ? " | TASK GATE: pending tasks block completion" : "";
|
|
153
175
|
lines[0] = lines[0]! + gate;
|
|
154
176
|
return lines.join("\n");
|
|
155
177
|
}
|
|
@@ -158,7 +180,8 @@ export function buildGoalAuditorPrompt(args: {
|
|
|
158
180
|
goal: GoalRecord;
|
|
159
181
|
completionSummary?: string | null;
|
|
160
182
|
detailedSummary: string;
|
|
161
|
-
|
|
183
|
+
verificationSummary?: string | null;
|
|
184
|
+
settings?: GoalSettings;
|
|
162
185
|
}): string {
|
|
163
186
|
return [
|
|
164
187
|
"You are the independent completion auditor for pi-goal.",
|
|
@@ -184,33 +207,36 @@ export function buildGoalAuditorPrompt(args: {
|
|
|
184
207
|
"Current goal metadata:",
|
|
185
208
|
"<goal_details>",
|
|
186
209
|
args.detailedSummary,
|
|
187
|
-
...(taskSummaryBlock(args.goal.taskList) ? ["", taskSummaryBlock(args.goal.taskList)] : []),
|
|
210
|
+
...(!args.settings?.disableTasks && taskSummaryBlock(args.goal.taskList) ? ["", taskSummaryBlock(args.goal.taskList)] : []),
|
|
188
211
|
"</goal_details>",
|
|
189
|
-
...(args.
|
|
212
|
+
...(args.verificationSummary?.trim() ? [
|
|
190
213
|
"",
|
|
191
|
-
"Executor
|
|
192
|
-
"<
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
"
|
|
214
|
+
"Executor verification summary:",
|
|
215
|
+
"<verification_summary>",
|
|
216
|
+
args.verificationSummary.trim(),
|
|
217
|
+
"</verification_summary>",
|
|
218
|
+
] : []),
|
|
219
|
+
...(!args.settings?.disableContracts && args.goal.verificationContract?.trim() ? [
|
|
220
|
+
"",
|
|
221
|
+
"Goal verification contract (what the executor was required to verify):",
|
|
222
|
+
"<verification_contract>",
|
|
223
|
+
args.goal.verificationContract.trim(),
|
|
224
|
+
"</verification_contract>",
|
|
199
225
|
] : []),
|
|
200
226
|
"",
|
|
201
227
|
"Audit checklist:",
|
|
202
|
-
...
|
|
203
|
-
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
204
|
-
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
205
|
-
"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
|
|
206
|
-
"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
207
|
-
"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
208
|
-
] : [
|
|
228
|
+
...[
|
|
209
229
|
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
210
230
|
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
231
|
+
...(args.verificationSummary?.trim()
|
|
232
|
+
? ["3. Check the <verification_summary> against real artifacts. If the executor claims to have run tests or searched for references, verify those claims with actual file/shell evidence. The summary is a claim, not proof — cross-check it."]
|
|
233
|
+
: []),
|
|
234
|
+
...(!args.settings?.disableContracts && args.goal.verificationContract?.trim()
|
|
235
|
+
? ["4. Verify that the executor has satisfied every item in the <verification_contract>. If any item is missing or weakly addressed, disapprove."]
|
|
236
|
+
: []),
|
|
237
|
+
"5. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
238
|
+
"6. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
239
|
+
],
|
|
214
240
|
"",
|
|
215
241
|
"Progress reporting:",
|
|
216
242
|
"You have the report_auditor_progress tool available to report your progress to the user.",
|
|
@@ -288,7 +314,8 @@ export async function runGoalCompletionAuditor(args: {
|
|
|
288
314
|
goal: GoalRecord;
|
|
289
315
|
completionSummary?: string | null;
|
|
290
316
|
detailedSummary: string;
|
|
291
|
-
|
|
317
|
+
verificationSummary?: string | null;
|
|
318
|
+
settings?: GoalSettings;
|
|
292
319
|
signal?: AbortSignal;
|
|
293
320
|
onProgress?: AuditorProgressCallback;
|
|
294
321
|
/**
|
package/extensions/goal-draft.ts
CHANGED
|
@@ -26,6 +26,57 @@ export function promptSafeObjective(objective: string): string {
|
|
|
26
26
|
return objective.replace(/<\/?untrusted_objective>/gi, (tag) => tag.replace(/</g, "<").replace(/>/g, ">"));
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
const VERIFICATION_CONTRACT_RE = /^Verification contract:\s*(.+)$/im;
|
|
30
|
+
|
|
31
|
+
const CONVENTIONAL_SECTION_NAMES = [
|
|
32
|
+
"success criteria",
|
|
33
|
+
"boundaries",
|
|
34
|
+
"constraints",
|
|
35
|
+
"if blocked",
|
|
36
|
+
"if blocked / unclear / failing",
|
|
37
|
+
"don'ts",
|
|
38
|
+
"sisyphus reminder",
|
|
39
|
+
"objective",
|
|
40
|
+
"目标",
|
|
41
|
+
"ordered steps",
|
|
42
|
+
"order rules",
|
|
43
|
+
"steps",
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Extract a `Verification contract:` section from a goal objective and return
|
|
48
|
+
* the cleaned objective (without the contract section) and the contract text.
|
|
49
|
+
*
|
|
50
|
+
* The contract section is a single line matching:
|
|
51
|
+
* Verification contract: <text>
|
|
52
|
+
*
|
|
53
|
+
* It can appear anywhere in the objective, but by convention it goes after
|
|
54
|
+
* the other sections (like Success criteria, Boundaries, Constraints).
|
|
55
|
+
*
|
|
56
|
+
* If no contract section is found, `verificationContract` is undefined.
|
|
57
|
+
*/
|
|
58
|
+
export function extractVerificationContract(objective: string): { objective: string; verificationContract?: string } {
|
|
59
|
+
const lines = objective.replace(/\r/g, "").split("\n");
|
|
60
|
+
let contract: string | undefined;
|
|
61
|
+
const filtered: string[] = [];
|
|
62
|
+
|
|
63
|
+
for (const line of lines) {
|
|
64
|
+
const trimmed = line.trim();
|
|
65
|
+
const m = VERIFICATION_CONTRACT_RE.exec(trimmed);
|
|
66
|
+
if (m) {
|
|
67
|
+
contract = m[1].trim();
|
|
68
|
+
// Skip this line — don't add it to the cleaned objective
|
|
69
|
+
} else {
|
|
70
|
+
filtered.push(line);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
objective: filtered.join("\n"),
|
|
76
|
+
verificationContract: contract || undefined,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
29
80
|
export function buildDraftConfirmationText(args: {
|
|
30
81
|
focus: GoalDraftingFocus;
|
|
31
82
|
originalTopic: string;
|
|
@@ -143,6 +194,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
|
|
|
143
194
|
"Success criteria: <observable evidence the goal is done>",
|
|
144
195
|
"Boundaries: <in scope / out of scope>",
|
|
145
196
|
"Constraints: <hard rules>",
|
|
197
|
+
"Verification contract: <optional — what verification evidence is required before marking complete, e.g. 'Run npm test (0 failures), grep for remaining references, re-read requirements and confirm every item is addressed'>",
|
|
146
198
|
"If blocked: <default = stop and ask the user>",
|
|
147
199
|
"Call propose_goal_draft with sisyphus=false and autoContinue=true unless the user asked otherwise.",
|
|
148
200
|
];
|
|
@@ -155,6 +207,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
|
|
|
155
207
|
"Success criteria: <observable evidence the whole ordered goal is done>",
|
|
156
208
|
"Boundaries: <in scope / out of scope>",
|
|
157
209
|
"Constraints: <hard rules, files not to touch, etc.>",
|
|
210
|
+
"Verification contract: <optional — what verification evidence is required before marking complete>",
|
|
158
211
|
"Ordered steps: <preserve the user's requested steps and ordering; do not add preflight or reconnaissance steps they did not ask for>",
|
|
159
212
|
"If blocked / unclear / failing: <default = stop and ask the user>",
|
|
160
213
|
"Sisyphus reminder: Work patiently and sequentially. No rushing, no unrequested preflight steps, no improvising around blockers.",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { statusLabel, type GoalDisplayRecordLike } from "./goal-core.ts";
|
|
2
|
-
import type { GoalTaskList, TaskStatus } from "./goal-record.ts";
|
|
2
|
+
import type { GoalTask, GoalTaskList, TaskStatus } from "./goal-record.ts";
|
|
3
3
|
|
|
4
4
|
export type GoalStatusLike = "active" | "paused" | "complete";
|
|
5
5
|
export type StopReasonLike = "user" | "agent";
|
|
@@ -126,10 +126,27 @@ export function abortGoalCommandMessage(args: { archived: boolean; wasDrafting:
|
|
|
126
126
|
return args.archived ? "Goal aborted and archived." : args.wasDrafting ? "Drafting cancelled." : "No goal is set.";
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
/** Count tasks in subtree recursively */
|
|
130
|
+
function countSubtreeTasks(tasks: GoalTask[]): { total: number; complete: number; skipped: number; pending: number } {
|
|
131
|
+
let total = 0;
|
|
132
|
+
let complete = 0;
|
|
133
|
+
let skipped = 0;
|
|
134
|
+
for (const t of tasks) {
|
|
135
|
+
total++;
|
|
136
|
+
if (t.status === "complete") complete++;
|
|
137
|
+
else if (t.status === "skipped") skipped++;
|
|
138
|
+
if (t.subtasks && t.subtasks.length > 0) {
|
|
139
|
+
const child = countSubtreeTasks(t.subtasks);
|
|
140
|
+
total += child.total;
|
|
141
|
+
complete += child.complete;
|
|
142
|
+
skipped += child.skipped;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return { total, complete, skipped, pending: total - complete - skipped };
|
|
146
|
+
}
|
|
147
|
+
|
|
129
148
|
export function buildTaskSummary(taskList: GoalTaskList): string {
|
|
130
|
-
const total = taskList.tasks
|
|
131
|
-
const complete = taskList.tasks.filter((t) => t.status === "complete").length;
|
|
132
|
-
const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
|
|
149
|
+
const { total, complete, skipped } = countSubtreeTasks(taskList.tasks);
|
|
133
150
|
if (total === 0) return "No tasks";
|
|
134
151
|
const parts: string[] = [`${complete}/${total} tasks complete`];
|
|
135
152
|
if (skipped > 0) parts.push(`(${skipped} skipped)`);
|
|
@@ -138,9 +155,28 @@ export function buildTaskSummary(taskList: GoalTaskList): string {
|
|
|
138
155
|
|
|
139
156
|
export function taskCompletionBlockWarning(taskList: GoalTaskList): string | null {
|
|
140
157
|
if (!taskList.blockCompletion) return null;
|
|
141
|
-
const pending = taskList.tasks
|
|
142
|
-
if (pending
|
|
143
|
-
return `${pending
|
|
158
|
+
const { pending } = countSubtreeTasks(taskList.tasks);
|
|
159
|
+
if (pending === 0) return null;
|
|
160
|
+
return `${pending} task${pending > 1 ? "s" : ""} still pending with blockCompletion enabled. Complete or skip all pending tasks before finishing the goal.`;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Validate that a verificationSummary satisfies a verificationContract.
|
|
165
|
+
* If a contract exists, the summary must be non-empty.
|
|
166
|
+
*/
|
|
167
|
+
export function validateVerificationSummary(args: {
|
|
168
|
+
verificationContract?: string | null;
|
|
169
|
+
verificationSummary?: string | null;
|
|
170
|
+
}): PolicyValidation {
|
|
171
|
+
const contract = args.verificationContract?.trim();
|
|
172
|
+
const summary = args.verificationSummary?.trim();
|
|
173
|
+
if (contract && !summary) {
|
|
174
|
+
return {
|
|
175
|
+
ok: false,
|
|
176
|
+
message: `This goal has a verification contract but no verificationSummary was provided. Provide a verificationSummary that addresses the contract requirements.`,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
return { ok: true };
|
|
144
180
|
}
|
|
145
181
|
|
|
146
182
|
export function validateTaskCompletion(args: {
|
|
@@ -171,9 +207,43 @@ export function validateTaskSkip(args: {
|
|
|
171
207
|
return { ok: true };
|
|
172
208
|
}
|
|
173
209
|
|
|
210
|
+
/**
|
|
211
|
+
* Count the maximum nesting depth of a task's subtask tree.
|
|
212
|
+
* Root level = 0. Returns the deepest nesting depth found.
|
|
213
|
+
*/
|
|
214
|
+
export function measureSubtaskDepth(task: GoalTask): number {
|
|
215
|
+
if (!task.subtasks || task.subtasks.length === 0) return 0;
|
|
216
|
+
let maxChild = 0;
|
|
217
|
+
for (const child of task.subtasks) {
|
|
218
|
+
const childDepth = measureSubtaskDepth(child);
|
|
219
|
+
if (childDepth > maxChild) maxChild = childDepth;
|
|
220
|
+
}
|
|
221
|
+
return maxChild + 1;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Validate that a task's subtask tree does not exceed the configured max depth.
|
|
226
|
+
* maxDepth is the subtaskDepth setting (default 1) — how many levels of nesting are allowed.
|
|
227
|
+
* Returns the first violation found, or undefined if valid.
|
|
228
|
+
*/
|
|
229
|
+
export function findSubtaskDepthViolation(tasks: GoalTask[], maxDepth: number): string | undefined {
|
|
230
|
+
for (const task of tasks) {
|
|
231
|
+
const depth = measureSubtaskDepth(task);
|
|
232
|
+
if (depth > maxDepth) {
|
|
233
|
+
return `Task "${task.id}" has subtask nesting depth ${depth}, exceeding the configured maximum of ${maxDepth}`;
|
|
234
|
+
}
|
|
235
|
+
if (task.subtasks) {
|
|
236
|
+
const childViolation = findSubtaskDepthViolation(task.subtasks, maxDepth);
|
|
237
|
+
if (childViolation) return childViolation;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return undefined;
|
|
241
|
+
}
|
|
242
|
+
|
|
174
243
|
export function validateTaskListProposal(args: {
|
|
175
244
|
goal: GoalPolicyRecordLike | null;
|
|
176
|
-
tasks:
|
|
245
|
+
tasks: GoalTask[];
|
|
246
|
+
maxSubtaskDepth?: number;
|
|
177
247
|
}): PolicyValidation {
|
|
178
248
|
if (!args.goal) return { ok: false, message: "No goal is set." };
|
|
179
249
|
if (args.tasks.length > 50) return { ok: false, message: "Task list cannot exceed 50 tasks." };
|
|
@@ -184,9 +254,78 @@ export function validateTaskListProposal(args: {
|
|
|
184
254
|
if (ids.has(t.id)) return { ok: false, message: `Duplicate task id: "${t.id}".` };
|
|
185
255
|
ids.add(t.id);
|
|
186
256
|
}
|
|
257
|
+
// Check subtask depth limit
|
|
258
|
+
const maxDepth = args.maxSubtaskDepth ?? 1;
|
|
259
|
+
const depthViolation = findSubtaskDepthViolation(args.tasks, maxDepth);
|
|
260
|
+
if (depthViolation) return { ok: false, message: depthViolation };
|
|
187
261
|
return { ok: true };
|
|
188
262
|
}
|
|
189
263
|
|
|
264
|
+
/**
|
|
265
|
+
* Recursively find a task by ID in a task tree.
|
|
266
|
+
*/
|
|
267
|
+
export function findTaskInTree(tasks: GoalTask[], taskId: string): GoalTask | undefined {
|
|
268
|
+
for (const t of tasks) {
|
|
269
|
+
if (t.id === taskId) return t;
|
|
270
|
+
if (t.subtasks) {
|
|
271
|
+
const found = findTaskInTree(t.subtasks, taskId);
|
|
272
|
+
if (found) return found;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return undefined;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Recursively update a task by ID in a task tree using an updater function.
|
|
280
|
+
*/
|
|
281
|
+
export function updateTaskInTree(tasks: GoalTask[], taskId: string, updater: (task: GoalTask) => GoalTask): GoalTask[] {
|
|
282
|
+
return tasks.map((t) => {
|
|
283
|
+
if (t.id === taskId) return updater(t);
|
|
284
|
+
if (t.subtasks) {
|
|
285
|
+
return { ...t, subtasks: updateTaskInTree(t.subtasks, taskId, updater) };
|
|
286
|
+
}
|
|
287
|
+
return t;
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Check if all subtasks of a task are complete (for full subtasks only).
|
|
293
|
+
* Returns undefined when all are complete/skipped, or an error message.
|
|
294
|
+
*/
|
|
295
|
+
export function checkSubtasksComplete(task: GoalTask): string | undefined {
|
|
296
|
+
if (!task.subtasks || task.subtasks.length === 0 || task.lightweightSubtasks) return undefined;
|
|
297
|
+
for (const child of task.subtasks) {
|
|
298
|
+
if (child.status === "pending") {
|
|
299
|
+
return `Task "${task.id}" has pending subtask "${child.id}". Complete or skip all subtasks first.`;
|
|
300
|
+
}
|
|
301
|
+
// Check recursively
|
|
302
|
+
const childCheck = checkSubtasksComplete(child);
|
|
303
|
+
if (childCheck) return childCheck;
|
|
304
|
+
}
|
|
305
|
+
return undefined;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Recursively skip all subtasks of a task.
|
|
310
|
+
* Returns a set of all skipped task IDs.
|
|
311
|
+
*/
|
|
312
|
+
export function skipAllSubtasks(task: GoalTask, now: string, reason: string): GoalTask {
|
|
313
|
+
if (!task.subtasks || task.subtasks.length === 0) return task;
|
|
314
|
+
return {
|
|
315
|
+
...task,
|
|
316
|
+
subtasks: task.subtasks.map((child) => {
|
|
317
|
+
if (child.status === "complete") return child;
|
|
318
|
+
const skipped = {
|
|
319
|
+
...child,
|
|
320
|
+
status: "skipped" as const,
|
|
321
|
+
skippedAt: now,
|
|
322
|
+
skipReason: reason,
|
|
323
|
+
};
|
|
324
|
+
return skipAllSubtasks(skipped, now, reason);
|
|
325
|
+
}),
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
190
329
|
export function buildCompletionReport(args: { detailedSummary: string; completionSummary?: string | null; auditorReport?: string | null; auditSkippedReason?: string | null; taskSummary?: string | null }): string {
|
|
191
330
|
const auditSkipped = args.auditSkippedReason?.trim();
|
|
192
331
|
const auditorReport = args.auditorReport?.trim();
|
|
@@ -92,6 +92,11 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
|
|
|
92
92
|
const totalTabs = questions.length + 1;
|
|
93
93
|
|
|
94
94
|
return await ctx.ui.custom<GoalQuestionnaireResult>((tui, theme, _kb, done) => {
|
|
95
|
+
// Suppress hardware cursor during dialog to reduce TUI auto-scroll
|
|
96
|
+
// (the TUI render loop runs at ~60fps and writes ANSI cursor positioning
|
|
97
|
+
// sequences every cycle, which can cause terminal viewport snapping).
|
|
98
|
+
const wasHardwareCursorShown = tui.getShowHardwareCursor();
|
|
99
|
+
tui.setShowHardwareCursor(false);
|
|
95
100
|
let currentTab = 0;
|
|
96
101
|
let optionIndex = 0;
|
|
97
102
|
let inputMode = false;
|
|
@@ -118,6 +123,8 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
|
|
|
118
123
|
}
|
|
119
124
|
|
|
120
125
|
function submit(cancelled: boolean) {
|
|
126
|
+
// Restore hardware cursor now that the dialog is closing
|
|
127
|
+
tui.setShowHardwareCursor(wasHardwareCursorShown);
|
|
121
128
|
const ordered = questions.map((q) => answers.get(q.id)).filter((a): a is GoalQuestionnaireAnswer => !!a);
|
|
122
129
|
done({ questions, answers: ordered, cancelled });
|
|
123
130
|
}
|