pi-goal-x 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,6 +10,22 @@ The extension is designed around one rule: **the user owns intent; the agent exe
10
10
 
11
11
  All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are preserved. The following changes are specific to pi-goal-x:
12
12
 
13
+ ### Verification contract system
14
+
15
+ - **Per-goal verification contracts** — when drafting a goal, include a `Verification contract:` section with plain-text requirements (e.g. "Run npm test (0 failures), grep for remaining STP references"). The contract is extracted, stored on the goal record, and enforced by the `complete_goal` tool — the call is rejected unless the agent provides a non-empty `verificationSummary` matching the contract.
16
+ - **Per-task verification contracts** — `propose_task_list` supports an optional `verificationContract` per task. If set, `complete_task` requires a non-empty `verificationSummary`.
17
+ - **Both prompt and tool enforcement** — prompts include a VERIFICATION CONTRACT section instructing the agent; tool validators reject calls that violate the contract.
18
+ - **Backward compatible** — goals/tasks without a `Verification contract:` section work exactly as before. No contract = no enforcement.
19
+ - **Auditor integration** — the independent completion auditor receives both the `verificationContract` and `verificationSummary` and cross-checks claims against real artifacts.
20
+ - **`complete_goal` `testResults` removed** — replaced with `verificationSummary`. The old structured test results interface is gone.
21
+
22
+ ### Task list system
23
+
24
+ - **Structured task breakdown** — the agent can propose a task list via `propose_task_list`, which shows the user a Confirm / Continue Chatting dialog (mirrors the `propose_goal_draft` pattern). Once confirmed, tasks are displayed in prompts, the widget, serialized to disk, and included in auditor review.
25
+ - **Per-task completion** — `complete_task` marks individual tasks done with optional evidence, and `skip_task` marks tasks as skipped with a required reason. Neither stops the turn, so the agent can continue uninterrupted.
26
+ - **Optional `taskList`** — goals without a task list work exactly as before. The feature is entirely opt-in.
27
+ - **Soft `complete_goal` gate** — when `blockCompletion: true` is set, `complete_goal` surfaces a warning if pending tasks remain (prompt-level only; the agent can still complete).
28
+
13
29
  ### Goal objective is immutable
14
30
 
15
31
  - The goal objective is immutable — the agent **must not** modify it autonomously. Objective changes are only possible through `propose_goal_tweak`, which presents the user with a Confirm / Continue Chatting dialog matching the `propose_goal_draft` confirmation pattern. This prevents the agent from silently changing the goal contract.
@@ -23,7 +39,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
23
39
  ### E2e test infrastructure
24
40
 
25
41
  - **Deterministic fork tests using `--mode json`**: the e2e suite spawns a real `pi --fork --mode json` session, parses structured `tool_execution_start`/`tool_execution_end` JSON events for field-level assertions — no free-text AI output parsing. Uses `--append-system-prompt` + `--tools` to force deterministic tool calls.
26
- - **Full coverage**: 143 tests total — function-level integration tests (12), mock-pi handler tests (4), file-validity checks (6), real `pi --fork --mode json` tests (3 scenarios: quick-sync, combined sync+complete, deferred archival), and propose_goal_tweak unit/integration/e2e tests (15).
42
+ - **Full coverage**: 205 tests total — function-level integration tests (12), mock-pi handler tests (4), file-validity checks (6), real `pi --fork --mode json` tests (3 scenarios), propose_goal_tweak unit/integration/e2e tests (15), task list policy/round-trip/render tests (50+), and verification contract tests (14).
27
43
 
28
44
  ### Completion auditor
29
45
 
@@ -158,9 +174,12 @@ The extension exposes tools only when they make sense for the current lifecycle
158
174
  | `get_goal` | always | Read the focused goal state; mentions other open goals when present |
159
175
  | `propose_goal_draft` | drafting only (goal creation) | Submit a concrete draft for user confirmation |
160
176
  | `propose_goal_tweak` | tweak drafting only | Submit a revision to an existing goal (shows Confirm / Continue Chatting dialog) |
161
- | `update_goal` | focused active or paused goal | Mark the focused goal complete when all requirements are satisfied. When the auditor is disabled, supply `confirmBypassAuditor: true` after user confirmation to bypass the audit |
177
+ | `complete_goal` | focused active or paused goal | Mark the focused goal complete supply a `verificationSummary` covering all contract items. When the auditor is disabled, supply `confirmBypassAuditor: true` after user confirmation to bypass the audit |
162
178
  | `pause_goal` | focused active goal | Pause the focused goal because of a real blocker |
163
179
  | `abort_goal` | focused active or paused goal | Abort/archive an obsolete, impossible, unsafe, or user-cancelled focused goal |
180
+ | `propose_task_list` | active or paused goal | Propose a structured task list for user confirmation (stops the turn) |
181
+ | `complete_task` | active or paused goal | Mark a task complete with optional `verificationSummary`. If the task has a `verificationContract`, the summary is required (does not stop turn) |
182
+ | `skip_task` | active or paused goal | Mark a task skipped with a required reason (does not stop turn) |
164
183
  | `propose_goal_tweak` | tweak drafting only | Submit a revision to the focused goal (shows Confirm / Continue Chatting dialog) |
165
184
  | `step_complete` | hidden / legacy | Compatibility no-op; Sisyphus no longer requires a step counter |
166
185
  | `create_goal` | hidden | Direct calls are rejected; normal creation goes through `propose_goal_draft` |
@@ -228,7 +247,7 @@ The shipped gates are intentionally small and mechanical.
228
247
  | Completion auditor gate | Archiving completion unless an independent pi auditor agent returns `<approved/>` |
229
248
  | Abort gate | Aborting missing, stale, completed, or reasonless goals |
230
249
  | Direct-create rejection | Hidden `create_goal` calls creating goals without the confirmation flow |
231
- | Post-stop block | Continuing to call tools after `pause_goal`, `abort_goal`, `update_goal`, or `propose_goal_tweak` stops the turn |
250
+ | Post-stop block | Continuing to call tools after `pause_goal`, `abort_goal`, `complete_goal`, or `propose_goal_tweak` stops the turn |
232
251
  | Empty-turn guard | Pure chat loops that would keep auto-continuing without meaningful goal work |
233
252
  | Abort pause | Active goals staying active after user abort / Ctrl-C |
234
253
  | Disk reconciliation | External pause/archive/delete/status changes being ignored or overwritten by stale memory |
@@ -28,7 +28,7 @@
28
28
  -> runtime 重新计算 prompt 与 tool surface
29
29
  -> 执行 agent 按 focused goal 工作
30
30
  -> tool call / turn event 更新 accounting 与 ledger
31
- -> 执行 agent 调用 update_goal 请求完成
31
+ -> 执行 agent 调用 complete_goal 请求完成
32
32
  -> 独立 auditor agent 检查完成声明
33
33
  -> 只有 auditor approval 才归档为 complete
34
34
  ```
@@ -43,7 +43,7 @@
43
43
  -> 用户确认
44
44
  -> 写入 active goal 文件并设置 focus
45
45
  -> agent 跨一个或多个 turn 执行工作
46
- -> agent 调用 update_goal(status="complete")
46
+ -> agent 调用 complete_goal(status="complete")
47
47
  -> 对话中出现 Goal audit started
48
48
  -> auditor session 检查真实产物
49
49
  -> 对话中出现 Goal audit approved
@@ -200,7 +200,7 @@ interface GoalConfirmationIntent {
200
200
  | `goal_question` / `goal_questionnaire` | goal confirmation / tweak drafting 中的结构化用户对话。 |
201
201
  | `propose_goal_draft` | 提交 goal 草案给用户确认;没有 confirmation intent 时会被 validator 拒绝。 |
202
202
  | `apply_goal_tweak` | 提交并应用 goal 修改。 |
203
- | `update_goal` | 请求完成目标,并触发独立审计。 |
203
+ | `complete_goal` | 请求完成目标,并触发独立审计。 |
204
204
  | `pause_goal` | agent 因真实 blocker 暂停目标。 |
205
205
  | `abort_goal` | agent 因目标废弃、不可行、不安全等原因中止目标。 |
206
206
  | `step_complete` | 隐藏的 legacy no-op;Sisyphus 不再使用 step counter。 |
@@ -267,7 +267,7 @@ completion 不信任执行 agent 单方声明,而是一个双 agent 协议。
267
267
  }
268
268
  ```
269
269
 
270
- `update_goal` 会先校验 focused goal 是否可以完成,然后写入 `completion_requested` ledger event。
270
+ `complete_goal` 会先校验 focused goal 是否可以完成,然后写入 `completion_requested` ledger event。
271
271
 
272
272
  ### 9.2 对话中出现 audit started
273
273
 
@@ -281,7 +281,7 @@ Auditor model: ...
281
281
  Completion claim: ...
282
282
  ```
283
283
 
284
- 这让 audit 成为 transcript 里一个明确的 agentic 阶段,而不是隐藏在 `update_goal` tool result 里。
284
+ 这让 audit 成为 transcript 里一个明确的 agentic 阶段,而不是隐藏在 `complete_goal` tool result 里。
285
285
 
286
286
  ### 9.3 独立 auditor session
287
287
 
@@ -343,7 +343,7 @@ Audit Report 或 rejection reason
343
343
 
344
344
  agent 可以在真实 blocker 下调用 `pause_goal`。用户也可以用 `/goal-pause` 或 abort active run 来暂停目标。
345
345
 
346
- `pause_goal`、`abort_goal`、`update_goal`、`apply_goal_tweak` 成功后,会设置 `turnStoppedFor`。之后同一个 turn 里,`tool_call` hook 会阻止额外的非允许工具调用。这个 hard gate 仍然保留:生命周期已经 stop 后,agent 应该总结并交还控制,而不是继续修改文件。
346
+ `pause_goal`、`abort_goal`、`complete_goal`、`apply_goal_tweak` 成功后,会设置 `turnStoppedFor`。之后同一个 turn 里,`tool_call` hook 会阻止额外的非允许工具调用。这个 hard gate 仍然保留:生命周期已经 stop 后,agent 应该总结并交还控制,而不是继续修改文件。
347
347
 
348
348
  pause 与 abort 的区别:
349
349
 
@@ -391,7 +391,7 @@ Execution runtime
391
391
  v
392
392
  Executor agent
393
393
  |-- 正常 read/write/bash/edit 工作
394
- |-- pause_goal / abort_goal / update_goal
394
+ |-- pause_goal / abort_goal / complete_goal
395
395
  v
396
396
  Completion request
397
397
  |-- 对话中出现 Goal audit started
@@ -131,7 +131,7 @@ The following behaviors remain runtime-enforced:
131
131
  4. **Mode consistency.** A draft proposal cannot silently change `/goals` into Sisyphus or `/sisyphus` into a regular goal.
132
132
  5. **Stale continuation protection.** A queued continuation for an old goal cannot perform work for a different current goal.
133
133
  6. **Human-owned focus.** The agent cannot silently switch focus between open goals.
134
- 7. **Completion audit.** `update_goal(status="complete")` archives only if the independent auditor returns exactly one approving marker.
134
+ 7. **Completion audit.** `complete_goal(status="complete")` archives only if the independent auditor returns exactly one approving marker.
135
135
  8. **Path safety.** Goal files and archives must remain under expected `.pi/goals` paths.
136
136
  9. **Post-stop transaction boundary.** After pause, abort, approved completion, or applied tweak, the same turn should not continue substantive work.
137
137
  10. **No hard cost control/cap lifecycle.** Resource-control is outside this runtime; auto-continue uses semantic stop conditions and the empty-turn guard.
@@ -211,7 +211,7 @@ The runtime keeps tools for irreversible transitions:
211
211
 
212
212
  - `propose_goal_draft`
213
213
  - `get_goal`
214
- - `update_goal`
214
+ - `complete_goal`
215
215
  - `pause_goal`
216
216
  - `abort_goal`
217
217
  - `apply_goal_tweak`
@@ -193,7 +193,7 @@ Continuation prompts include a goal id so stale prompts can be detected and neut
193
193
 
194
194
  ## Completion output
195
195
 
196
- Completion is intentionally verbose in the tool result and guarded by an independent auditor agent. `update_goal(status="complete")` is valid for active and paused goals; paused goals do not need to be resumed just to record completion when existing evidence is sufficient.
196
+ Completion is intentionally verbose in the tool result and guarded by an independent auditor agent. `complete_goal(status="complete")` is valid for active and paused goals; paused goals do not need to be resumed just to record completion when existing evidence is sufficient.
197
197
 
198
198
  Before archiving, the tool starts a separate in-memory pi session with a focused auditor prompt. The auditor receives the objective, executor completion summary, and goal metadata, can inspect the workspace with `read`, `grep`, `find`, `ls`, and `bash`, and must end with exactly one marker:
199
199
 
@@ -13,7 +13,7 @@ import {
13
13
  type ExtensionContext,
14
14
  type ResourceLoader,
15
15
  } from "@earendil-works/pi-coding-agent";
16
- import type { GoalRecord } from "./goal-record.ts";
16
+ import type { GoalRecord, GoalTaskList } from "./goal-record.ts";
17
17
 
18
18
  export interface GoalAuditorConfig {
19
19
  provider?: string;
@@ -127,22 +127,34 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
127
127
  return { approved: approved && !disapproved, disapproved };
128
128
  }
129
129
 
130
- export interface AuditorTestResults {
131
- /** Exit code of the test run (0 = success) */
132
- exitCode: number;
133
- /** Test suite name, e.g. 'npm test' */
134
- suiteName?: string;
135
- /** Last lines of test output showing results */
136
- output?: string;
137
- /** ISO timestamp of when tests were run */
138
- timestamp?: string;
130
+ export interface AuditorVerificationEvidence {
131
+ /** The agent's verification summary describing what was checked. */
132
+ summary: string;
133
+ /** The goal's verification contract (what the agent was required to verify), if any. */
134
+ contract?: string;
135
+ }
136
+
137
+ function taskSummaryBlock(taskList?: GoalTaskList | null): string {
138
+ if (!taskList || taskList.tasks.length === 0) return "";
139
+ const total = taskList.tasks.length;
140
+ const complete = taskList.tasks.filter((t) => t.status === "complete").length;
141
+ const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
142
+ const pending = taskList.tasks.filter((t) => t.status === "pending");
143
+ const lines: string[] = [`Tasks: ${complete}/${total} complete${skipped > 0 ? `, ${skipped} skipped` : ""}`];
144
+ for (const task of taskList.tasks) {
145
+ const marker = task.status === "complete" ? "[x]" : task.status === "skipped" ? "[~]" : "[ ]";
146
+ lines.push(` ${marker} ${task.id}: ${task.title}`);
147
+ }
148
+ const gate = taskList.blockCompletion && pending.length > 0 ? " | TASK GATE: pending tasks block completion" : "";
149
+ lines[0] = lines[0]! + gate;
150
+ return lines.join("\n");
139
151
  }
140
152
 
141
153
  export function buildGoalAuditorPrompt(args: {
142
154
  goal: GoalRecord;
143
155
  completionSummary?: string | null;
144
156
  detailedSummary: string;
145
- testResults?: AuditorTestResults | null;
157
+ verificationSummary?: string | null;
146
158
  }): string {
147
159
  return [
148
160
  "You are the independent completion auditor for pi-goal.",
@@ -168,32 +180,36 @@ export function buildGoalAuditorPrompt(args: {
168
180
  "Current goal metadata:",
169
181
  "<goal_details>",
170
182
  args.detailedSummary,
183
+ ...(taskSummaryBlock(args.goal.taskList) ? ["", taskSummaryBlock(args.goal.taskList)] : []),
171
184
  "</goal_details>",
172
- ...(args.testResults ? [
185
+ ...(args.verificationSummary?.trim() ? [
173
186
  "",
174
- "Executor test evidence:",
175
- "<test_evidence>",
176
- ` Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
177
- ` Exit code: ${args.testResults.exitCode}`,
178
- ` Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
179
- ` Output:`,
180
- ...(args.testResults.output ? args.testResults.output.split("\n").map((l) => ` ${l}`) : [" (none provided)"]),
181
- "</test_evidence>",
187
+ "Executor verification summary:",
188
+ "<verification_summary>",
189
+ args.verificationSummary.trim(),
190
+ "</verification_summary>",
191
+ ] : []),
192
+ ...(args.goal.verificationContract?.trim() ? [
193
+ "",
194
+ "Goal verification contract (what the executor was required to verify):",
195
+ "<verification_contract>",
196
+ args.goal.verificationContract.trim(),
197
+ "</verification_contract>",
182
198
  ] : []),
183
199
  "",
184
200
  "Audit checklist:",
185
- ...(args.testResults ? [
186
- "1. Extract the real success criteria from the objective, including quality/reader outcomes.",
187
- "2. Inspect artifacts or command output that can prove or disprove those criteria.",
188
- "3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
189
- "4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
190
- "5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
191
- ] : [
201
+ ...[
192
202
  "1. Extract the real success criteria from the objective, including quality/reader outcomes.",
193
203
  "2. Inspect artifacts or command output that can prove or disprove those criteria.",
194
- "3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
195
- "4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
196
- ]),
204
+ ...(args.verificationSummary?.trim()
205
+ ? ["3. Check the <verification_summary> against real artifacts. If the executor claims to have run tests or searched for references, verify those claims with actual file/shell evidence. The summary is a claim, not proof — cross-check it."]
206
+ : []),
207
+ ...(args.goal.verificationContract?.trim()
208
+ ? ["4. Verify that the executor has satisfied every item in the <verification_contract>. If any item is missing or weakly addressed, disapprove."]
209
+ : []),
210
+ "5. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
211
+ "6. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
212
+ ],
197
213
  "",
198
214
  "Progress reporting:",
199
215
  "You have the report_auditor_progress tool available to report your progress to the user.",
@@ -271,7 +287,7 @@ export async function runGoalCompletionAuditor(args: {
271
287
  goal: GoalRecord;
272
288
  completionSummary?: string | null;
273
289
  detailedSummary: string;
274
- testResults?: AuditorTestResults | null;
290
+ verificationSummary?: string | null;
275
291
  signal?: AbortSignal;
276
292
  onProgress?: AuditorProgressCallback;
277
293
  /**
@@ -46,6 +46,15 @@ export function buildGoalCompactSummary(goal: GoalRecord, events: GoalLedgerEven
46
46
  case "goal_completed":
47
47
  lines.push(" - completed");
48
48
  break;
49
+ case "task_list_set":
50
+ lines.push(` - task list set: ${event.taskCount} tasks${event.blockCompletion ? " (blocking)" : ""}`);
51
+ break;
52
+ case "task_complete":
53
+ lines.push(` - task complete: ${event.taskId}${event.evidence ? ` — ${truncateText(event.evidence, 60)}` : ""}`);
54
+ break;
55
+ case "task_skipped":
56
+ lines.push(` - task skipped: ${event.taskId} — ${truncateText(event.reason, 60)}`);
57
+ break;
49
58
  case "goal_aborted":
50
59
  lines.push(` - aborted: ${event.reason}`);
51
60
  break;
@@ -26,6 +26,57 @@ export function promptSafeObjective(objective: string): string {
26
26
  return objective.replace(/<\/?untrusted_objective>/gi, (tag) => tag.replace(/</g, "&lt;").replace(/>/g, "&gt;"));
27
27
  }
28
28
 
29
+ const VERIFICATION_CONTRACT_RE = /^Verification contract:\s*(.+)$/im;
30
+
31
+ const CONVENTIONAL_SECTION_NAMES = [
32
+ "success criteria",
33
+ "boundaries",
34
+ "constraints",
35
+ "if blocked",
36
+ "if blocked / unclear / failing",
37
+ "don'ts",
38
+ "sisyphus reminder",
39
+ "objective",
40
+ "目标",
41
+ "ordered steps",
42
+ "order rules",
43
+ "steps",
44
+ ];
45
+
46
+ /**
47
+ * Extract a `Verification contract:` section from a goal objective and return
48
+ * the cleaned objective (without the contract section) and the contract text.
49
+ *
50
+ * The contract section is a single line matching:
51
+ * Verification contract: <text>
52
+ *
53
+ * It can appear anywhere in the objective, but by convention it goes after
54
+ * the other sections (like Success criteria, Boundaries, Constraints).
55
+ *
56
+ * If no contract section is found, `verificationContract` is undefined.
57
+ */
58
+ export function extractVerificationContract(objective: string): { objective: string; verificationContract?: string } {
59
+ const lines = objective.replace(/\r/g, "").split("\n");
60
+ let contract: string | undefined;
61
+ const filtered: string[] = [];
62
+
63
+ for (const line of lines) {
64
+ const trimmed = line.trim();
65
+ const m = VERIFICATION_CONTRACT_RE.exec(trimmed);
66
+ if (m) {
67
+ contract = m[1].trim();
68
+ // Skip this line — don't add it to the cleaned objective
69
+ } else {
70
+ filtered.push(line);
71
+ }
72
+ }
73
+
74
+ return {
75
+ objective: filtered.join("\n"),
76
+ verificationContract: contract || undefined,
77
+ };
78
+ }
79
+
29
80
  export function buildDraftConfirmationText(args: {
30
81
  focus: GoalDraftingFocus;
31
82
  originalTopic: string;
@@ -131,6 +182,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
131
182
  "- If the topic is already concrete, you may proceed directly to propose_goal_draft.",
132
183
  "- The goal contract should make the objective, success criteria, boundaries, constraints, and blocker rule explicit.",
133
184
  "- Keep grilling assumptions until the objective, success criteria, boundaries, constraints, and blocker rule are clear enough to confirm.",
185
+ "- After a goal is confirmed, you may call propose_task_list on the first continuation turn if the objective naturally decomposes into trackable milestones. Do not add a task list for simple, single-step goals.",
134
186
  "- propose_goal_draft opens the user's Confirm / Continue Chatting dialog. Confirm creates and focuses the goal; Continue Chatting means keep refining through normal proposal cycles.",
135
187
  "- create_goal is not a shortcut. Direct create_goal calls are rejected so the user keeps explicit say in goal creation.",
136
188
  ];
@@ -142,6 +194,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
142
194
  "Success criteria: <observable evidence the goal is done>",
143
195
  "Boundaries: <in scope / out of scope>",
144
196
  "Constraints: <hard rules>",
197
+ "Verification contract: <optional — what verification evidence is required before marking complete, e.g. 'Run npm test (0 failures), grep for remaining references, re-read requirements and confirm every item is addressed'>",
145
198
  "If blocked: <default = stop and ask the user>",
146
199
  "Call propose_goal_draft with sisyphus=false and autoContinue=true unless the user asked otherwise.",
147
200
  ];
@@ -154,6 +207,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
154
207
  "Success criteria: <observable evidence the whole ordered goal is done>",
155
208
  "Boundaries: <in scope / out of scope>",
156
209
  "Constraints: <hard rules, files not to touch, etc.>",
210
+ "Verification contract: <optional — what verification evidence is required before marking complete>",
157
211
  "Ordered steps: <preserve the user's requested steps and ordering; do not add preflight or reconnaissance steps they did not ask for>",
158
212
  "If blocked / unclear / failing: <default = stop and ask the user>",
159
213
  "Sisyphus reminder: Work patiently and sequentially. No rushing, no unrequested preflight steps, no improvising around blockers.",
@@ -16,7 +16,10 @@ export type GoalLedgerEvent =
16
16
  | { type: "audit_result"; goalId: string; verdict: "approved" | "disapproved" | "error"; report: string; at: string }
17
17
  | { type: "audit_skipped"; goalId: string; reason: "disabled" | "user_aborted"; provider?: string; model?: string; thinkingLevel?: string; at: string }
18
18
  | { type: "goal_completed"; goalId: string; archivePath?: string; at: string }
19
- | { type: "goal_aborted"; goalId: string; reason: string; archivePath?: string; at: string };
19
+ | { type: "goal_aborted"; goalId: string; reason: string; archivePath?: string; at: string }
20
+ | { type: "task_list_set"; goalId: string; taskCount: number; blockCompletion: boolean; at: string }
21
+ | { type: "task_complete"; goalId: string; taskId: string; evidence?: string; at: string }
22
+ | { type: "task_skipped"; goalId: string; taskId: string; reason: string; at: string };
20
23
 
21
24
  export interface GoalLedgerContext {
22
25
  cwd: string;
@@ -147,6 +150,12 @@ function isValidLedgerEvent(value: unknown): value is GoalLedgerEvent {
147
150
  return typeof obj.goalId === "string" && (obj.archivePath === undefined || typeof obj.archivePath === "string");
148
151
  case "goal_aborted":
149
152
  return typeof obj.goalId === "string" && typeof obj.reason === "string" && (obj.archivePath === undefined || typeof obj.archivePath === "string");
153
+ case "task_list_set":
154
+ return typeof obj.goalId === "string" && typeof obj.taskCount === "number" && typeof obj.blockCompletion === "boolean";
155
+ case "task_complete":
156
+ return typeof obj.goalId === "string" && typeof obj.taskId === "string" && (obj.evidence === undefined || typeof obj.evidence === "string");
157
+ case "task_skipped":
158
+ return typeof obj.goalId === "string" && typeof obj.taskId === "string" && typeof obj.reason === "string";
150
159
  default:
151
160
  return false;
152
161
  }
@@ -176,6 +185,12 @@ function sanitizeEvent(event: GoalLedgerEvent): GoalLedgerEvent {
176
185
  return { ...event, goalId: safeGoalId(event.goalId) };
177
186
  case "goal_aborted":
178
187
  return { ...event, goalId: safeGoalId(event.goalId) };
188
+ case "task_list_set":
189
+ return { ...event, goalId: safeGoalId(event.goalId) };
190
+ case "task_complete":
191
+ return { ...event, goalId: safeGoalId(event.goalId) };
192
+ case "task_skipped":
193
+ return { ...event, goalId: safeGoalId(event.goalId) };
179
194
  case "goal_unfocused":
180
195
  return event;
181
196
  }
@@ -1,4 +1,5 @@
1
1
  import { statusLabel, type GoalDisplayRecordLike } from "./goal-core.ts";
2
+ import type { GoalTaskList, TaskStatus } from "./goal-record.ts";
2
3
 
3
4
  export type GoalStatusLike = "active" | "paused" | "complete";
4
5
  export type StopReasonLike = "user" | "agent";
@@ -9,6 +10,7 @@ export interface GoalPolicyRecordLike extends GoalDisplayRecordLike {
9
10
  updatedAt?: string;
10
11
  pauseReason?: string;
11
12
  pauseSuggestedAction?: string;
13
+ taskList?: GoalTaskList;
12
14
  }
13
15
 
14
16
  export type PolicyValidation =
@@ -39,7 +41,7 @@ export function validateGoalCompletion(args: {
39
41
  const { goal, runningGoalId } = args;
40
42
  if (!goal) return { ok: false, message: "No goal is set." };
41
43
  if (runningGoalId && goal.id !== runningGoalId) return { ok: false, message: "The active goal changed during this run; not marking it complete." };
42
- if (!isCompletableStatus(goal.status)) return { ok: false, message: `Goal is ${statusLabel(goal)}; update_goal does not apply.` };
44
+ if (!isCompletableStatus(goal.status)) return { ok: false, message: `Goal is ${statusLabel(goal)}; complete_goal does not apply.` };
43
45
  return { ok: true };
44
46
  }
45
47
 
@@ -124,7 +126,87 @@ export function abortGoalCommandMessage(args: { archived: boolean; wasDrafting:
124
126
  return args.archived ? "Goal aborted and archived." : args.wasDrafting ? "Drafting cancelled." : "No goal is set.";
125
127
  }
126
128
 
127
- export function buildCompletionReport(args: { detailedSummary: string; completionSummary?: string | null; auditorReport?: string | null; auditSkippedReason?: string | null }): string {
129
+ export function buildTaskSummary(taskList: GoalTaskList): string {
130
+ const total = taskList.tasks.length;
131
+ const complete = taskList.tasks.filter((t) => t.status === "complete").length;
132
+ const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
133
+ if (total === 0) return "No tasks";
134
+ const parts: string[] = [`${complete}/${total} tasks complete`];
135
+ if (skipped > 0) parts.push(`(${skipped} skipped)`);
136
+ return parts.join(" ");
137
+ }
138
+
139
+ export function taskCompletionBlockWarning(taskList: GoalTaskList): string | null {
140
+ if (!taskList.blockCompletion) return null;
141
+ const pending = taskList.tasks.filter((t) => t.status === "pending");
142
+ if (pending.length === 0) return null;
143
+ return `${pending.length} task${pending.length > 1 ? "s" : ""} still pending with blockCompletion enabled. Complete or skip all pending tasks before finishing the goal.`;
144
+ }
145
+
146
+ /**
147
+ * Validate that a verificationSummary satisfies a verificationContract.
148
+ * If a contract exists, the summary must be non-empty.
149
+ */
150
+ export function validateVerificationSummary(args: {
151
+ verificationContract?: string | null;
152
+ verificationSummary?: string | null;
153
+ }): PolicyValidation {
154
+ const contract = args.verificationContract?.trim();
155
+ const summary = args.verificationSummary?.trim();
156
+ if (contract && !summary) {
157
+ return {
158
+ ok: false,
159
+ message: `This goal has a verification contract but no verificationSummary was provided. Provide a verificationSummary that addresses the contract requirements.`,
160
+ };
161
+ }
162
+ return { ok: true };
163
+ }
164
+
165
+ export function validateTaskCompletion(args: {
166
+ goal: GoalPolicyRecordLike | null;
167
+ taskId: string;
168
+ }): PolicyValidation {
169
+ if (!args.goal) return { ok: false, message: "No goal is set." };
170
+ if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
171
+ const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
172
+ if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
173
+ if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
174
+ if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
175
+ return { ok: true };
176
+ }
177
+
178
+ export function validateTaskSkip(args: {
179
+ goal: GoalPolicyRecordLike | null;
180
+ taskId: string;
181
+ reason: string;
182
+ }): PolicyValidation {
183
+ if (!args.goal) return { ok: false, message: "No goal is set." };
184
+ if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
185
+ const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
186
+ if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
187
+ if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
188
+ if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
189
+ if (!args.reason.trim()) return { ok: false, message: "skip_task requires a non-empty reason." };
190
+ return { ok: true };
191
+ }
192
+
193
+ export function validateTaskListProposal(args: {
194
+ goal: GoalPolicyRecordLike | null;
195
+ tasks: { id: string; title: string }[];
196
+ }): PolicyValidation {
197
+ if (!args.goal) return { ok: false, message: "No goal is set." };
198
+ if (args.tasks.length > 50) return { ok: false, message: "Task list cannot exceed 50 tasks." };
199
+ const ids = new Set<string>();
200
+ for (const t of args.tasks) {
201
+ if (!t.id.trim()) return { ok: false, message: "All tasks must have a non-empty id." };
202
+ if (!t.title.trim()) return { ok: false, message: `Task "${t.id}" must have a non-empty title.` };
203
+ if (ids.has(t.id)) return { ok: false, message: `Duplicate task id: "${t.id}".` };
204
+ ids.add(t.id);
205
+ }
206
+ return { ok: true };
207
+ }
208
+
209
+ export function buildCompletionReport(args: { detailedSummary: string; completionSummary?: string | null; auditorReport?: string | null; auditSkippedReason?: string | null; taskSummary?: string | null }): string {
128
210
  const auditSkipped = args.auditSkippedReason?.trim();
129
211
  const auditorReport = args.auditorReport?.trim();
130
212
  const lines = auditSkipped
@@ -136,6 +218,10 @@ export function buildCompletionReport(args: { detailedSummary: string; completio
136
218
  if (summary) {
137
219
  lines.push("", "Completion summary:", summary);
138
220
  }
221
+ const taskSummary = args.taskSummary?.trim();
222
+ if (taskSummary) {
223
+ lines.push("", `Task summary: ${taskSummary}`);
224
+ }
139
225
  lines.push("", args.detailedSummary);
140
226
  return lines.join("\n");
141
227
  }
@@ -318,7 +318,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
318
318
  const selected = i === optionIndex;
319
319
  const prefix = selected ? theme.fg("accent", "> ") : " ";
320
320
  const recTag = !opt.isCustom && q?.recommended === i ? theme.fg("success", " ★") : "";
321
- add(prefix + theme.fg(selected ? "accent" : "text", `${i + 1}. ${opt.label}`) + recTag);
321
+ addWrapped(prefix + theme.fg(selected ? "accent" : "text", `${i + 1}. ${opt.label}`) + recTag);
322
322
  }
323
323
  }
324
324
 
@@ -4,6 +4,25 @@ export type GoalEventKind = "checkpoint" | "stale" | "drafting";
4
4
  export type DraftingFocus = "goal" | "sisyphus";
5
5
  export type GoalFocusReason = "created" | "selected" | "resumed" | "completed" | "cleared" | "aborted" | "migrated";
6
6
 
7
+ export type TaskStatus = "pending" | "complete" | "skipped";
8
+
9
+ export interface GoalTask {
10
+ id: string;
11
+ title: string;
12
+ status: TaskStatus;
13
+ completedAt?: string;
14
+ skippedAt?: string;
15
+ evidence?: string;
16
+ skipReason?: string;
17
+ verificationContract?: string;
18
+ }
19
+
20
+ export interface GoalTaskList {
21
+ tasks: GoalTask[];
22
+ blockCompletion: boolean;
23
+ proposedAt: string;
24
+ }
25
+
7
26
  export interface GoalUsage {
8
27
  tokensUsed: number;
9
28
  activeSeconds: number;
@@ -24,6 +43,9 @@ export interface GoalRecord {
24
43
  // Set by the agent's pause_goal tool. Cleared when the goal becomes active again.
25
44
  pauseReason?: string;
26
45
  pauseSuggestedAction?: string;
46
+ taskList?: GoalTaskList;
47
+ /** Plain-text description of what verification evidence is required before completing this goal. */
48
+ verificationContract?: string;
27
49
  }
28
50
 
29
51
  export interface GoalStateEntry {
@@ -90,7 +112,13 @@ export function emptyUsage(): GoalUsage {
90
112
  }
91
113
 
92
114
  export function cloneGoal(goal: GoalRecord): GoalRecord {
93
- return { ...goal, usage: { ...goal.usage } };
115
+ return {
116
+ ...goal,
117
+ usage: { ...goal.usage },
118
+ taskList: goal.taskList
119
+ ? { ...goal.taskList, tasks: goal.taskList.tasks.map(t => ({ ...t })) }
120
+ : undefined,
121
+ };
94
122
  }
95
123
 
96
124
  export function goalFocusDetails(focusedGoalId: string | null, reason: GoalFocusReason): GoalFocusEntry {
@@ -136,6 +164,38 @@ export function normalizeUsage(value: unknown): GoalUsage {
136
164
  return { tokensUsed, activeSeconds };
137
165
  }
138
166
 
167
+ export function normalizeTaskList(value: unknown): GoalTaskList | undefined {
168
+ const raw = asRecord(value);
169
+ if (!raw) return undefined;
170
+ const tasksRaw = raw.tasks;
171
+ if (!Array.isArray(tasksRaw)) return undefined;
172
+ const tasks: GoalTask[] = [];
173
+ for (const item of tasksRaw) {
174
+ if (!item || typeof item !== "object" || Array.isArray(item)) continue;
175
+ const t = item as Record<string, unknown>;
176
+ const id = typeof t.id === "string" && t.id.trim() ? t.id.trim() : "";
177
+ const title = typeof t.title === "string" ? t.title.trim() : "";
178
+ if (!id || !title) continue;
179
+ const status: TaskStatus = t.status === "complete" ? "complete" : t.status === "skipped" ? "skipped" : "pending";
180
+ tasks.push({
181
+ id,
182
+ title,
183
+ status,
184
+ completedAt: typeof t.completedAt === "string" ? t.completedAt : undefined,
185
+ skippedAt: typeof t.skippedAt === "string" ? t.skippedAt : undefined,
186
+ evidence: typeof t.evidence === "string" ? t.evidence : undefined,
187
+ skipReason: typeof t.skipReason === "string" ? t.skipReason : undefined,
188
+ verificationContract: typeof t.verificationContract === "string" ? t.verificationContract : undefined,
189
+ });
190
+ }
191
+ if (tasks.length === 0) return undefined;
192
+ return {
193
+ tasks,
194
+ blockCompletion: raw.blockCompletion === true,
195
+ proposedAt: typeof raw.proposedAt === "string" ? raw.proposedAt : nowIso(),
196
+ };
197
+ }
198
+
139
199
  export function normalizeGoalRecord(value: unknown): GoalRecord | null {
140
200
  const raw = asRecord(value);
141
201
  if (!raw) return null;
@@ -167,5 +227,7 @@ export function normalizeGoalRecord(value: unknown): GoalRecord | null {
167
227
  stopReason: raw.stopReason === "agent" || raw.stopReason === "user" ? raw.stopReason : undefined,
168
228
  pauseReason: typeof raw.pauseReason === "string" && raw.pauseReason.trim() ? raw.pauseReason : undefined,
169
229
  pauseSuggestedAction: typeof raw.pauseSuggestedAction === "string" && raw.pauseSuggestedAction.trim() ? raw.pauseSuggestedAction : undefined,
230
+ taskList: normalizeTaskList(raw.taskList),
231
+ verificationContract: typeof raw.verificationContract === "string" ? raw.verificationContract : undefined,
170
232
  };
171
233
  }