npm - pi-goal-x - Versions diffs - 0.11.0 → 0.13.0 - Mend

pi-goal-x 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +22 -3
package/docs/agent-flow-design.md +7 -7
package/docs/agentic-runtime-prd.md +2 -2
package/docs/architecture.md +1 -1
package/extensions/goal-auditor.ts +47 -31
package/extensions/goal-compaction.ts +9 -0
package/extensions/goal-draft.ts +54 -0
package/extensions/goal-ledger.ts +16 -1
package/extensions/goal-policy.ts +88 -2
package/extensions/goal-questionnaire.ts +1 -1
package/extensions/goal-record.ts +63 -1
package/extensions/goal-tool-names.ts +12 -4
package/extensions/goal.ts +406 -83
package/extensions/prompts/goal-prompts.ts +82 -10
package/extensions/storage/goal-files.ts +27 -1
package/extensions/widgets/goal-widget.ts +19 -2
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -10,6 +10,22 @@ The extension is designed around one rule: **the user owns intent; the agent exe
 All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are preserved. The following changes are specific to pi-goal-x:
+### Verification contract system
+- **Per-goal verification contracts** — when drafting a goal, include a `Verification contract:` section with plain-text requirements (e.g. "Run npm test (0 failures), grep for remaining STP references"). The contract is extracted, stored on the goal record, and enforced by the `complete_goal` tool — the call is rejected unless the agent provides a non-empty `verificationSummary` matching the contract.
+- **Per-task verification contracts** — `propose_task_list` supports an optional `verificationContract` per task. If set, `complete_task` requires a non-empty `verificationSummary`.
+- **Both prompt and tool enforcement** — prompts include a VERIFICATION CONTRACT section instructing the agent; tool validators reject calls that violate the contract.
+- **Backward compatible** — goals/tasks without a `Verification contract:` section work exactly as before. No contract = no enforcement.
+- **Auditor integration** — the independent completion auditor receives both the `verificationContract` and `verificationSummary` and cross-checks claims against real artifacts.
+- **`complete_goal` `testResults` removed** — replaced with `verificationSummary`. The old structured test results interface is gone.
+### Task list system
+- **Structured task breakdown** — the agent can propose a task list via `propose_task_list`, which shows the user a Confirm / Continue Chatting dialog (mirrors the `propose_goal_draft` pattern). Once confirmed, tasks are displayed in prompts, the widget, serialized to disk, and included in auditor review.
+- **Per-task completion** — `complete_task` marks individual tasks done with optional evidence, and `skip_task` marks tasks as skipped with a required reason. Neither stops the turn, so the agent can continue uninterrupted.
+- **Optional `taskList`** — goals without a task list work exactly as before. The feature is entirely opt-in.
+- **Soft `complete_goal` gate** — when `blockCompletion: true` is set, `complete_goal` surfaces a warning if pending tasks remain (prompt-level only; the agent can still complete).
 ### Goal objective is immutable
 - The goal objective is immutable — the agent **must not** modify it autonomously. Objective changes are only possible through `propose_goal_tweak`, which presents the user with a Confirm / Continue Chatting dialog matching the `propose_goal_draft` confirmation pattern. This prevents the agent from silently changing the goal contract.
@@ -23,7 +39,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
 ### E2e test infrastructure
 - **Deterministic fork tests using `--mode json`**: the e2e suite spawns a real `pi --fork --mode json` session, parses structured `tool_execution_start`/`tool_execution_end` JSON events for field-level assertions — no free-text AI output parsing. Uses `--append-system-prompt` + `--tools` to force deterministic tool calls.
-- **Full coverage**: 143 tests total — function-level integration tests (12), mock-pi handler tests (4), file-validity checks (6), real `pi --fork --mode json` tests (3 scenarios: quick-sync, combined sync+complete, deferred archival), and propose_goal_tweak unit/integration/e2e tests (15).
+- **Full coverage**: 205 tests total — function-level integration tests (12), mock-pi handler tests (4), file-validity checks (6), real `pi --fork --mode json` tests (3 scenarios), propose_goal_tweak unit/integration/e2e tests (15), task list policy/round-trip/render tests (50+), and verification contract tests (14).
 ### Completion auditor
@@ -158,9 +174,12 @@ The extension exposes tools only when they make sense for the current lifecycle
 | `get_goal` | always | Read the focused goal state; mentions other open goals when present |
 | `propose_goal_draft` | drafting only (goal creation) | Submit a concrete draft for user confirmation |
 | `propose_goal_tweak` | tweak drafting only | Submit a revision to an existing goal (shows Confirm / Continue Chatting dialog) |
-| `update_goal` | focused active or paused goal | Mark the focused goal complete when all requirements are satisfied. When the auditor is disabled, supply `confirmBypassAuditor: true` after user confirmation to bypass the audit |
+| `complete_goal` | focused active or paused goal | Mark the focused goal complete — supply a `verificationSummary` covering all contract items. When the auditor is disabled, supply `confirmBypassAuditor: true` after user confirmation to bypass the audit |
 | `pause_goal` | focused active goal | Pause the focused goal because of a real blocker |
 | `abort_goal` | focused active or paused goal | Abort/archive an obsolete, impossible, unsafe, or user-cancelled focused goal |
+| `propose_task_list` | active or paused goal | Propose a structured task list for user confirmation (stops the turn) |
+| `complete_task` | active or paused goal | Mark a task complete with optional `verificationSummary`. If the task has a `verificationContract`, the summary is required (does not stop turn) |
+| `skip_task` | active or paused goal | Mark a task skipped with a required reason (does not stop turn) |
 | `propose_goal_tweak` | tweak drafting only | Submit a revision to the focused goal (shows Confirm / Continue Chatting dialog) |
 | `step_complete` | hidden / legacy | Compatibility no-op; Sisyphus no longer requires a step counter |
 | `create_goal` | hidden | Direct calls are rejected; normal creation goes through `propose_goal_draft` |
@@ -228,7 +247,7 @@ The shipped gates are intentionally small and mechanical.
 | Completion auditor gate | Archiving completion unless an independent pi auditor agent returns `<approved/>` |
 | Abort gate | Aborting missing, stale, completed, or reasonless goals |
 | Direct-create rejection | Hidden `create_goal` calls creating goals without the confirmation flow |
-| Post-stop block | Continuing to call tools after `pause_goal`, `abort_goal`, `update_goal`, or `propose_goal_tweak` stops the turn |
+| Post-stop block | Continuing to call tools after `pause_goal`, `abort_goal`, `complete_goal`, or `propose_goal_tweak` stops the turn |
 | Empty-turn guard | Pure chat loops that would keep auto-continuing without meaningful goal work |
 | Abort pause | Active goals staying active after user abort / Ctrl-C |
 | Disk reconciliation | External pause/archive/delete/status changes being ignored or overwritten by stale memory |

package/docs/agent-flow-design.md CHANGED Viewed

@@ -28,7 +28,7 @@
   -> runtime 重新计算 prompt 与 tool surface
   -> 执行 agent 按 focused goal 工作
   -> tool call / turn event 更新 accounting 与 ledger
-  -> 执行 agent 调用 update_goal 请求完成
+  -> 执行 agent 调用 complete_goal 请求完成
   -> 独立 auditor agent 检查完成声明
   -> 只有 auditor approval 才归档为 complete
 ```
@@ -43,7 +43,7 @@
   -> 用户确认
   -> 写入 active goal 文件并设置 focus
   -> agent 跨一个或多个 turn 执行工作
-  -> agent 调用 update_goal(status="complete")
+  -> agent 调用 complete_goal(status="complete")
   -> 对话中出现 Goal audit started
   -> auditor session 检查真实产物
   -> 对话中出现 Goal audit approved
@@ -200,7 +200,7 @@ interface GoalConfirmationIntent {
 | `goal_question` / `goal_questionnaire` | goal confirmation / tweak drafting 中的结构化用户对话。 |
 | `propose_goal_draft` | 提交 goal 草案给用户确认；没有 confirmation intent 时会被 validator 拒绝。 |
 | `apply_goal_tweak` | 提交并应用 goal 修改。 |
-| `update_goal` | 请求完成目标，并触发独立审计。 |
+| `complete_goal` | 请求完成目标，并触发独立审计。 |
 | `pause_goal` | agent 因真实 blocker 暂停目标。 |
 | `abort_goal` | agent 因目标废弃、不可行、不安全等原因中止目标。 |
 | `step_complete` | 隐藏的 legacy no-op；Sisyphus 不再使用 step counter。 |
@@ -267,7 +267,7 @@ completion 不信任执行 agent 单方声明，而是一个双 agent 协议。
 }
 ```
-`update_goal` 会先校验 focused goal 是否可以完成，然后写入 `completion_requested` ledger event。
+`complete_goal` 会先校验 focused goal 是否可以完成，然后写入 `completion_requested` ledger event。
 ### 9.2 对话中出现 audit started
@@ -281,7 +281,7 @@ Auditor model: ...
 Completion claim: ...
 ```
-这让 audit 成为 transcript 里一个明确的 agentic 阶段，而不是隐藏在 `update_goal` tool result 里。
+这让 audit 成为 transcript 里一个明确的 agentic 阶段，而不是隐藏在 `complete_goal` tool result 里。
 ### 9.3 独立 auditor session
@@ -343,7 +343,7 @@ Audit Report 或 rejection reason
 agent 可以在真实 blocker 下调用 `pause_goal`。用户也可以用 `/goal-pause` 或 abort active run 来暂停目标。
-`pause_goal`、`abort_goal`、`update_goal`、`apply_goal_tweak` 成功后，会设置 `turnStoppedFor`。之后同一个 turn 里，`tool_call` hook 会阻止额外的非允许工具调用。这个 hard gate 仍然保留：生命周期已经 stop 后，agent 应该总结并交还控制，而不是继续修改文件。
+`pause_goal`、`abort_goal`、`complete_goal`、`apply_goal_tweak` 成功后，会设置 `turnStoppedFor`。之后同一个 turn 里，`tool_call` hook 会阻止额外的非允许工具调用。这个 hard gate 仍然保留：生命周期已经 stop 后，agent 应该总结并交还控制，而不是继续修改文件。
 pause 与 abort 的区别：
@@ -391,7 +391,7 @@ Execution runtime
   v
 Executor agent
   |-- 正常 read/write/bash/edit 工作
-  |-- pause_goal / abort_goal / update_goal
+  |-- pause_goal / abort_goal / complete_goal
   v
 Completion request
   |-- 对话中出现 Goal audit started

package/docs/agentic-runtime-prd.md CHANGED Viewed

@@ -131,7 +131,7 @@ The following behaviors remain runtime-enforced:
 4. **Mode consistency.** A draft proposal cannot silently change `/goals` into Sisyphus or `/sisyphus` into a regular goal.
 5. **Stale continuation protection.** A queued continuation for an old goal cannot perform work for a different current goal.
 6. **Human-owned focus.** The agent cannot silently switch focus between open goals.
-7. **Completion audit.** `update_goal(status="complete")` archives only if the independent auditor returns exactly one approving marker.
+7. **Completion audit.** `complete_goal(status="complete")` archives only if the independent auditor returns exactly one approving marker.
 8. **Path safety.** Goal files and archives must remain under expected `.pi/goals` paths.
 9. **Post-stop transaction boundary.** After pause, abort, approved completion, or applied tweak, the same turn should not continue substantive work.
 10. **No hard cost control/cap lifecycle.** Resource-control is outside this runtime; auto-continue uses semantic stop conditions and the empty-turn guard.
@@ -211,7 +211,7 @@ The runtime keeps tools for irreversible transitions:
 - `propose_goal_draft`
 - `get_goal`
-- `update_goal`
+- `complete_goal`
 - `pause_goal`
 - `abort_goal`
 - `apply_goal_tweak`

package/docs/architecture.md CHANGED Viewed

@@ -193,7 +193,7 @@ Continuation prompts include a goal id so stale prompts can be detected and neut
 ## Completion output
-Completion is intentionally verbose in the tool result and guarded by an independent auditor agent. `update_goal(status="complete")` is valid for active and paused goals; paused goals do not need to be resumed just to record completion when existing evidence is sufficient.
+Completion is intentionally verbose in the tool result and guarded by an independent auditor agent. `complete_goal(status="complete")` is valid for active and paused goals; paused goals do not need to be resumed just to record completion when existing evidence is sufficient.
 Before archiving, the tool starts a separate in-memory pi session with a focused auditor prompt. The auditor receives the objective, executor completion summary, and goal metadata, can inspect the workspace with `read`, `grep`, `find`, `ls`, and `bash`, and must end with exactly one marker:

package/extensions/goal-auditor.ts CHANGED Viewed

@@ -13,7 +13,7 @@ import {
 	type ExtensionContext,
 	type ResourceLoader,
 } from "@earendil-works/pi-coding-agent";
-import type { GoalRecord } from "./goal-record.ts";
+import type { GoalRecord, GoalTaskList } from "./goal-record.ts";
 export interface GoalAuditorConfig {
 	provider?: string;
@@ -127,22 +127,34 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
 	return { approved: approved && !disapproved, disapproved };
 }
-export interface AuditorTestResults {
-	/** Exit code of the test run (0 = success) */
-	exitCode: number;
-	/** Test suite name, e.g. 'npm test' */
-	suiteName?: string;
-	/** Last lines of test output showing results */
-	output?: string;
-	/** ISO timestamp of when tests were run */
-	timestamp?: string;
+export interface AuditorVerificationEvidence {
+	/** The agent's verification summary describing what was checked. */
+	summary: string;
+	/** The goal's verification contract (what the agent was required to verify), if any. */
+	contract?: string;
+}
+function taskSummaryBlock(taskList?: GoalTaskList | null): string {
+	if (!taskList || taskList.tasks.length === 0) return "";
+	const total = taskList.tasks.length;
+	const complete = taskList.tasks.filter((t) => t.status === "complete").length;
+	const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
+	const pending = taskList.tasks.filter((t) => t.status === "pending");
+	const lines: string[] = [`Tasks: ${complete}/${total} complete${skipped > 0 ? `, ${skipped} skipped` : ""}`];
+	for (const task of taskList.tasks) {
+		const marker = task.status === "complete" ? "[x]" : task.status === "skipped" ? "[~]" : "[ ]";
+		lines.push(`  ${marker} ${task.id}: ${task.title}`);
+	}
+	const gate = taskList.blockCompletion && pending.length > 0 ? " | TASK GATE: pending tasks block completion" : "";
+	lines[0] = lines[0]! + gate;
+	return lines.join("\n");
 }
 export function buildGoalAuditorPrompt(args: {
 	goal: GoalRecord;
 	completionSummary?: string | null;
 	detailedSummary: string;
-	testResults?: AuditorTestResults | null;
+	verificationSummary?: string | null;
 }): string {
 	return [
 		"You are the independent completion auditor for pi-goal.",
@@ -168,32 +180,36 @@ export function buildGoalAuditorPrompt(args: {
 		"Current goal metadata:",
 		"<goal_details>",
 		args.detailedSummary,
+		...(taskSummaryBlock(args.goal.taskList) ? ["", taskSummaryBlock(args.goal.taskList)] : []),
 		"</goal_details>",
-		...(args.testResults ? [
+		...(args.verificationSummary?.trim() ? [
 			"",
-			"Executor test evidence:",
-			"<test_evidence>",
-			`  Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
-			`  Exit code: ${args.testResults.exitCode}`,
-			`  Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
-			`  Output:`,
-			...(args.testResults.output ? args.testResults.output.split("\n").map((l) => `    ${l}`) : ["    (none provided)"]),
-			"</test_evidence>",
+			"Executor verification summary:",
+			"<verification_summary>",
+			args.verificationSummary.trim(),
+			"</verification_summary>",
+		] : []),
+		...(args.goal.verificationContract?.trim() ? [
+			"",
+			"Goal verification contract (what the executor was required to verify):",
+			"<verification_contract>",
+			args.goal.verificationContract.trim(),
+			"</verification_contract>",
 		] : []),
 		"",
 		"Audit checklist:",
-		...(args.testResults ? [
-			"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
-			"2. Inspect artifacts or command output that can prove or disprove those criteria.",
-			"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
-			"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
-			"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
-		] : [
+		...[
 			"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
 			"2. Inspect artifacts or command output that can prove or disprove those criteria.",
-			"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
-			"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
-		]),
+			...(args.verificationSummary?.trim()
+				? ["3. Check the <verification_summary> against real artifacts. If the executor claims to have run tests or searched for references, verify those claims with actual file/shell evidence. The summary is a claim, not proof — cross-check it."]
+				: []),
+			...(args.goal.verificationContract?.trim()
+				? ["4. Verify that the executor has satisfied every item in the <verification_contract>. If any item is missing or weakly addressed, disapprove."]
+				: []),
+			"5. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
+			"6. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		],
 		"",
 		"Progress reporting:",
 		"You have the report_auditor_progress tool available to report your progress to the user.",
@@ -271,7 +287,7 @@ export async function runGoalCompletionAuditor(args: {
 	goal: GoalRecord;
 	completionSummary?: string | null;
 	detailedSummary: string;
-	testResults?: AuditorTestResults | null;
+	verificationSummary?: string | null;
 	signal?: AbortSignal;
 	onProgress?: AuditorProgressCallback;
 	/**

package/extensions/goal-compaction.ts CHANGED Viewed

@@ -46,6 +46,15 @@ export function buildGoalCompactSummary(goal: GoalRecord, events: GoalLedgerEven
         case "goal_completed":
           lines.push("    - completed");
           break;
+        case "task_list_set":
+          lines.push(`    - task list set: ${event.taskCount} tasks${event.blockCompletion ? " (blocking)" : ""}`);
+          break;
+        case "task_complete":
+          lines.push(`    - task complete: ${event.taskId}${event.evidence ? ` — ${truncateText(event.evidence, 60)}` : ""}`);
+          break;
+        case "task_skipped":
+          lines.push(`    - task skipped: ${event.taskId} — ${truncateText(event.reason, 60)}`);
+          break;
         case "goal_aborted":
           lines.push(`    - aborted: ${event.reason}`);
           break;

package/extensions/goal-draft.ts CHANGED Viewed

@@ -26,6 +26,57 @@ export function promptSafeObjective(objective: string): string {
 	return objective.replace(/<\/?untrusted_objective>/gi, (tag) => tag.replace(/</g, "&lt;").replace(/>/g, "&gt;"));
 }
+const VERIFICATION_CONTRACT_RE = /^Verification contract:\s*(.+)$/im;
+const CONVENTIONAL_SECTION_NAMES = [
+	"success criteria",
+	"boundaries",
+	"constraints",
+	"if blocked",
+	"if blocked / unclear / failing",
+	"don'ts",
+	"sisyphus reminder",
+	"objective",
+	"目标",
+	"ordered steps",
+	"order rules",
+	"steps",
+];
+/**
+ * Extract a `Verification contract:` section from a goal objective and return
+ * the cleaned objective (without the contract section) and the contract text.
+ *
+ * The contract section is a single line matching:
+ *   Verification contract: <text>
+ *
+ * It can appear anywhere in the objective, but by convention it goes after
+ * the other sections (like Success criteria, Boundaries, Constraints).
+ *
+ * If no contract section is found, `verificationContract` is undefined.
+ */
+export function extractVerificationContract(objective: string): { objective: string; verificationContract?: string } {
+	const lines = objective.replace(/\r/g, "").split("\n");
+	let contract: string | undefined;
+	const filtered: string[] = [];
+	for (const line of lines) {
+		const trimmed = line.trim();
+		const m = VERIFICATION_CONTRACT_RE.exec(trimmed);
+		if (m) {
+			contract = m[1].trim();
+			// Skip this line — don't add it to the cleaned objective
+		} else {
+			filtered.push(line);
+		}
+	}
+	return {
+		objective: filtered.join("\n"),
+		verificationContract: contract || undefined,
+	};
+}
 export function buildDraftConfirmationText(args: {
 	focus: GoalDraftingFocus;
 	originalTopic: string;
@@ -131,6 +182,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
 		"- If the topic is already concrete, you may proceed directly to propose_goal_draft.",
 		"- The goal contract should make the objective, success criteria, boundaries, constraints, and blocker rule explicit.",
 		"- Keep grilling assumptions until the objective, success criteria, boundaries, constraints, and blocker rule are clear enough to confirm.",
+		"- After a goal is confirmed, you may call propose_task_list on the first continuation turn if the objective naturally decomposes into trackable milestones. Do not add a task list for simple, single-step goals.",
 		"- propose_goal_draft opens the user's Confirm / Continue Chatting dialog. Confirm creates and focuses the goal; Continue Chatting means keep refining through normal proposal cycles.",
 		"- create_goal is not a shortcut. Direct create_goal calls are rejected so the user keeps explicit say in goal creation.",
 	];
@@ -142,6 +194,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
 		"Success criteria: <observable evidence the goal is done>",
 		"Boundaries: <in scope / out of scope>",
 		"Constraints: <hard rules>",
+		"Verification contract: <optional — what verification evidence is required before marking complete, e.g. 'Run npm test (0 failures), grep for remaining references, re-read requirements and confirm every item is addressed'>",
 		"If blocked: <default = stop and ask the user>",
 		"Call propose_goal_draft with sisyphus=false and autoContinue=true unless the user asked otherwise.",
 	];
@@ -154,6 +207,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
 		"Success criteria: <observable evidence the whole ordered goal is done>",
 		"Boundaries: <in scope / out of scope>",
 		"Constraints: <hard rules, files not to touch, etc.>",
+		"Verification contract: <optional — what verification evidence is required before marking complete>",
 		"Ordered steps: <preserve the user's requested steps and ordering; do not add preflight or reconnaissance steps they did not ask for>",
 		"If blocked / unclear / failing: <default = stop and ask the user>",
 		"Sisyphus reminder: Work patiently and sequentially. No rushing, no unrequested preflight steps, no improvising around blockers.",

package/extensions/goal-ledger.ts CHANGED Viewed

@@ -16,7 +16,10 @@ export type GoalLedgerEvent =
   | { type: "audit_result"; goalId: string; verdict: "approved" | "disapproved" | "error"; report: string; at: string }
   | { type: "audit_skipped"; goalId: string; reason: "disabled" | "user_aborted"; provider?: string; model?: string; thinkingLevel?: string; at: string }
   | { type: "goal_completed"; goalId: string; archivePath?: string; at: string }
-  | { type: "goal_aborted"; goalId: string; reason: string; archivePath?: string; at: string };
+  | { type: "goal_aborted"; goalId: string; reason: string; archivePath?: string; at: string }
+  | { type: "task_list_set"; goalId: string; taskCount: number; blockCompletion: boolean; at: string }
+  | { type: "task_complete"; goalId: string; taskId: string; evidence?: string; at: string }
+  | { type: "task_skipped"; goalId: string; taskId: string; reason: string; at: string };
 export interface GoalLedgerContext {
   cwd: string;
@@ -147,6 +150,12 @@ function isValidLedgerEvent(value: unknown): value is GoalLedgerEvent {
       return typeof obj.goalId === "string" && (obj.archivePath === undefined || typeof obj.archivePath === "string");
     case "goal_aborted":
       return typeof obj.goalId === "string" && typeof obj.reason === "string" && (obj.archivePath === undefined || typeof obj.archivePath === "string");
+    case "task_list_set":
+      return typeof obj.goalId === "string" && typeof obj.taskCount === "number" && typeof obj.blockCompletion === "boolean";
+    case "task_complete":
+      return typeof obj.goalId === "string" && typeof obj.taskId === "string" && (obj.evidence === undefined || typeof obj.evidence === "string");
+    case "task_skipped":
+      return typeof obj.goalId === "string" && typeof obj.taskId === "string" && typeof obj.reason === "string";
     default:
       return false;
   }
@@ -176,6 +185,12 @@ function sanitizeEvent(event: GoalLedgerEvent): GoalLedgerEvent {
       return { ...event, goalId: safeGoalId(event.goalId) };
     case "goal_aborted":
       return { ...event, goalId: safeGoalId(event.goalId) };
+    case "task_list_set":
+      return { ...event, goalId: safeGoalId(event.goalId) };
+    case "task_complete":
+      return { ...event, goalId: safeGoalId(event.goalId) };
+    case "task_skipped":
+      return { ...event, goalId: safeGoalId(event.goalId) };
     case "goal_unfocused":
       return event;
   }

package/extensions/goal-policy.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { statusLabel, type GoalDisplayRecordLike } from "./goal-core.ts";
+import type { GoalTaskList, TaskStatus } from "./goal-record.ts";
 export type GoalStatusLike = "active" | "paused" | "complete";
 export type StopReasonLike = "user" | "agent";
@@ -9,6 +10,7 @@ export interface GoalPolicyRecordLike extends GoalDisplayRecordLike {
 	updatedAt?: string;
 	pauseReason?: string;
 	pauseSuggestedAction?: string;
+	taskList?: GoalTaskList;
 }
 export type PolicyValidation =
@@ -39,7 +41,7 @@ export function validateGoalCompletion(args: {
 	const { goal, runningGoalId } = args;
 	if (!goal) return { ok: false, message: "No goal is set." };
 	if (runningGoalId && goal.id !== runningGoalId) return { ok: false, message: "The active goal changed during this run; not marking it complete." };
-	if (!isCompletableStatus(goal.status)) return { ok: false, message: `Goal is ${statusLabel(goal)}; update_goal does not apply.` };
+	if (!isCompletableStatus(goal.status)) return { ok: false, message: `Goal is ${statusLabel(goal)}; complete_goal does not apply.` };
 	return { ok: true };
 }
@@ -124,7 +126,87 @@ export function abortGoalCommandMessage(args: { archived: boolean; wasDrafting:
 	return args.archived ? "Goal aborted and archived." : args.wasDrafting ? "Drafting cancelled." : "No goal is set.";
 }
-export function buildCompletionReport(args: { detailedSummary: string; completionSummary?: string | null; auditorReport?: string | null; auditSkippedReason?: string | null }): string {
+export function buildTaskSummary(taskList: GoalTaskList): string {
+	const total = taskList.tasks.length;
+	const complete = taskList.tasks.filter((t) => t.status === "complete").length;
+	const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
+	if (total === 0) return "No tasks";
+	const parts: string[] = [`${complete}/${total} tasks complete`];
+	if (skipped > 0) parts.push(`(${skipped} skipped)`);
+	return parts.join(" ");
+}
+export function taskCompletionBlockWarning(taskList: GoalTaskList): string | null {
+	if (!taskList.blockCompletion) return null;
+	const pending = taskList.tasks.filter((t) => t.status === "pending");
+	if (pending.length === 0) return null;
+	return `${pending.length} task${pending.length > 1 ? "s" : ""} still pending with blockCompletion enabled. Complete or skip all pending tasks before finishing the goal.`;
+}
+/**
+ * Validate that a verificationSummary satisfies a verificationContract.
+ * If a contract exists, the summary must be non-empty.
+ */
+export function validateVerificationSummary(args: {
+	verificationContract?: string | null;
+	verificationSummary?: string | null;
+}): PolicyValidation {
+	const contract = args.verificationContract?.trim();
+	const summary = args.verificationSummary?.trim();
+	if (contract && !summary) {
+		return {
+			ok: false,
+			message: `This goal has a verification contract but no verificationSummary was provided. Provide a verificationSummary that addresses the contract requirements.`,
+		};
+	}
+	return { ok: true };
+}
+export function validateTaskCompletion(args: {
+	goal: GoalPolicyRecordLike | null;
+	taskId: string;
+}): PolicyValidation {
+	if (!args.goal) return { ok: false, message: "No goal is set." };
+	if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
+	const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
+	if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
+	if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
+	if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
+	return { ok: true };
+}
+export function validateTaskSkip(args: {
+	goal: GoalPolicyRecordLike | null;
+	taskId: string;
+	reason: string;
+}): PolicyValidation {
+	if (!args.goal) return { ok: false, message: "No goal is set." };
+	if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
+	const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
+	if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
+	if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
+	if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
+	if (!args.reason.trim()) return { ok: false, message: "skip_task requires a non-empty reason." };
+	return { ok: true };
+}
+export function validateTaskListProposal(args: {
+	goal: GoalPolicyRecordLike | null;
+	tasks: { id: string; title: string }[];
+}): PolicyValidation {
+	if (!args.goal) return { ok: false, message: "No goal is set." };
+	if (args.tasks.length > 50) return { ok: false, message: "Task list cannot exceed 50 tasks." };
+	const ids = new Set<string>();
+	for (const t of args.tasks) {
+		if (!t.id.trim()) return { ok: false, message: "All tasks must have a non-empty id." };
+		if (!t.title.trim()) return { ok: false, message: `Task "${t.id}" must have a non-empty title.` };
+		if (ids.has(t.id)) return { ok: false, message: `Duplicate task id: "${t.id}".` };
+		ids.add(t.id);
+	}
+	return { ok: true };
+}
+export function buildCompletionReport(args: { detailedSummary: string; completionSummary?: string | null; auditorReport?: string | null; auditSkippedReason?: string | null; taskSummary?: string | null }): string {
 	const auditSkipped = args.auditSkippedReason?.trim();
 	const auditorReport = args.auditorReport?.trim();
 	const lines = auditSkipped
@@ -136,6 +218,10 @@ export function buildCompletionReport(args: { detailedSummary: string; completio
 	if (summary) {
 		lines.push("", "Completion summary:", summary);
 	}
+	const taskSummary = args.taskSummary?.trim();
+	if (taskSummary) {
+		lines.push("", `Task summary: ${taskSummary}`);
+	}
 	lines.push("", args.detailedSummary);
 	return lines.join("\n");
 }

package/extensions/goal-questionnaire.ts CHANGED Viewed

@@ -318,7 +318,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
 					const selected = i === optionIndex;
 					const prefix = selected ? theme.fg("accent", "> ") : "  ";
 					const recTag = !opt.isCustom && q?.recommended === i ? theme.fg("success", " ★") : "";
-					add(prefix + theme.fg(selected ? "accent" : "text", `${i + 1}. ${opt.label}`) + recTag);
+					addWrapped(prefix + theme.fg(selected ? "accent" : "text", `${i + 1}. ${opt.label}`) + recTag);
 				}
 			}

package/extensions/goal-record.ts CHANGED Viewed

@@ -4,6 +4,25 @@ export type GoalEventKind = "checkpoint" | "stale" | "drafting";
 export type DraftingFocus = "goal" | "sisyphus";
 export type GoalFocusReason = "created" | "selected" | "resumed" | "completed" | "cleared" | "aborted" | "migrated";
+export type TaskStatus = "pending" | "complete" | "skipped";
+export interface GoalTask {
+  id: string;
+  title: string;
+  status: TaskStatus;
+  completedAt?: string;
+  skippedAt?: string;
+  evidence?: string;
+  skipReason?: string;
+  verificationContract?: string;
+}
+export interface GoalTaskList {
+  tasks: GoalTask[];
+  blockCompletion: boolean;
+  proposedAt: string;
+}
 export interface GoalUsage {
 	tokensUsed: number;
 	activeSeconds: number;
@@ -24,6 +43,9 @@ export interface GoalRecord {
 	// Set by the agent's pause_goal tool. Cleared when the goal becomes active again.
 	pauseReason?: string;
 	pauseSuggestedAction?: string;
+	taskList?: GoalTaskList;
+	/** Plain-text description of what verification evidence is required before completing this goal. */
+	verificationContract?: string;
 }
 export interface GoalStateEntry {
@@ -90,7 +112,13 @@ export function emptyUsage(): GoalUsage {
 }
 export function cloneGoal(goal: GoalRecord): GoalRecord {
-	return { ...goal, usage: { ...goal.usage } };
+	return {
+		...goal,
+		usage: { ...goal.usage },
+		taskList: goal.taskList
+			? { ...goal.taskList, tasks: goal.taskList.tasks.map(t => ({ ...t })) }
+			: undefined,
+	};
 }
 export function goalFocusDetails(focusedGoalId: string | null, reason: GoalFocusReason): GoalFocusEntry {
@@ -136,6 +164,38 @@ export function normalizeUsage(value: unknown): GoalUsage {
 	return { tokensUsed, activeSeconds };
 }
+export function normalizeTaskList(value: unknown): GoalTaskList | undefined {
+	const raw = asRecord(value);
+	if (!raw) return undefined;
+	const tasksRaw = raw.tasks;
+	if (!Array.isArray(tasksRaw)) return undefined;
+	const tasks: GoalTask[] = [];
+	for (const item of tasksRaw) {
+		if (!item || typeof item !== "object" || Array.isArray(item)) continue;
+		const t = item as Record<string, unknown>;
+		const id = typeof t.id === "string" && t.id.trim() ? t.id.trim() : "";
+		const title = typeof t.title === "string" ? t.title.trim() : "";
+		if (!id || !title) continue;
+		const status: TaskStatus = t.status === "complete" ? "complete" : t.status === "skipped" ? "skipped" : "pending";
+		tasks.push({
+			id,
+			title,
+			status,
+			completedAt: typeof t.completedAt === "string" ? t.completedAt : undefined,
+			skippedAt: typeof t.skippedAt === "string" ? t.skippedAt : undefined,
+			evidence: typeof t.evidence === "string" ? t.evidence : undefined,
+			skipReason: typeof t.skipReason === "string" ? t.skipReason : undefined,
+			verificationContract: typeof t.verificationContract === "string" ? t.verificationContract : undefined,
+		});
+	}
+	if (tasks.length === 0) return undefined;
+	return {
+		tasks,
+		blockCompletion: raw.blockCompletion === true,
+		proposedAt: typeof raw.proposedAt === "string" ? raw.proposedAt : nowIso(),
+	};
+}
 export function normalizeGoalRecord(value: unknown): GoalRecord | null {
 	const raw = asRecord(value);
 	if (!raw) return null;
@@ -167,5 +227,7 @@ export function normalizeGoalRecord(value: unknown): GoalRecord | null {
 		stopReason: raw.stopReason === "agent" || raw.stopReason === "user" ? raw.stopReason : undefined,
 		pauseReason: typeof raw.pauseReason === "string" && raw.pauseReason.trim() ? raw.pauseReason : undefined,
 		pauseSuggestedAction: typeof raw.pauseSuggestedAction === "string" && raw.pauseSuggestedAction.trim() ? raw.pauseSuggestedAction : undefined,
+		taskList: normalizeTaskList(raw.taskList),
+		verificationContract: typeof raw.verificationContract === "string" ? raw.verificationContract : undefined,
 	};
 }