pi-goal-x 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -3
- package/docs/agent-flow-design.md +7 -7
- package/docs/agentic-runtime-prd.md +2 -2
- package/docs/architecture.md +1 -1
- package/extensions/goal-auditor.ts +47 -31
- package/extensions/goal-compaction.ts +9 -0
- package/extensions/goal-draft.ts +54 -0
- package/extensions/goal-ledger.ts +16 -1
- package/extensions/goal-policy.ts +88 -2
- package/extensions/goal-questionnaire.ts +1 -1
- package/extensions/goal-record.ts +63 -1
- package/extensions/goal-tool-names.ts +12 -4
- package/extensions/goal.ts +406 -83
- package/extensions/prompts/goal-prompts.ts +82 -10
- package/extensions/storage/goal-files.ts +27 -1
- package/extensions/widgets/goal-widget.ts +19 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,6 +10,22 @@ The extension is designed around one rule: **the user owns intent; the agent exe
|
|
|
10
10
|
|
|
11
11
|
All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are preserved. The following changes are specific to pi-goal-x:
|
|
12
12
|
|
|
13
|
+
### Verification contract system
|
|
14
|
+
|
|
15
|
+
- **Per-goal verification contracts** — when drafting a goal, include a `Verification contract:` section with plain-text requirements (e.g. "Run npm test (0 failures), grep for remaining STP references"). The contract is extracted, stored on the goal record, and enforced by the `complete_goal` tool — the call is rejected unless the agent provides a non-empty `verificationSummary` matching the contract.
|
|
16
|
+
- **Per-task verification contracts** — `propose_task_list` supports an optional `verificationContract` per task. If set, `complete_task` requires a non-empty `verificationSummary`.
|
|
17
|
+
- **Both prompt and tool enforcement** — prompts include a VERIFICATION CONTRACT section instructing the agent; tool validators reject calls that violate the contract.
|
|
18
|
+
- **Backward compatible** — goals/tasks without a `Verification contract:` section work exactly as before. No contract = no enforcement.
|
|
19
|
+
- **Auditor integration** — the independent completion auditor receives both the `verificationContract` and `verificationSummary` and cross-checks claims against real artifacts.
|
|
20
|
+
- **`complete_goal` `testResults` removed** — replaced with `verificationSummary`. The old structured test results interface is gone.
|
|
21
|
+
|
|
22
|
+
### Task list system
|
|
23
|
+
|
|
24
|
+
- **Structured task breakdown** — the agent can propose a task list via `propose_task_list`, which shows the user a Confirm / Continue Chatting dialog (mirrors the `propose_goal_draft` pattern). Once confirmed, tasks are displayed in prompts, the widget, serialized to disk, and included in auditor review.
|
|
25
|
+
- **Per-task completion** — `complete_task` marks individual tasks done with optional evidence, and `skip_task` marks tasks as skipped with a required reason. Neither stops the turn, so the agent can continue uninterrupted.
|
|
26
|
+
- **Optional `taskList`** — goals without a task list work exactly as before. The feature is entirely opt-in.
|
|
27
|
+
- **Soft `complete_goal` gate** — when `blockCompletion: true` is set, `complete_goal` surfaces a warning if pending tasks remain (prompt-level only; the agent can still complete).
|
|
28
|
+
|
|
13
29
|
### Goal objective is immutable
|
|
14
30
|
|
|
15
31
|
- The goal objective is immutable — the agent **must not** modify it autonomously. Objective changes are only possible through `propose_goal_tweak`, which presents the user with a Confirm / Continue Chatting dialog matching the `propose_goal_draft` confirmation pattern. This prevents the agent from silently changing the goal contract.
|
|
@@ -23,7 +39,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
|
|
|
23
39
|
### E2e test infrastructure
|
|
24
40
|
|
|
25
41
|
- **Deterministic fork tests using `--mode json`**: the e2e suite spawns a real `pi --fork --mode json` session, parses structured `tool_execution_start`/`tool_execution_end` JSON events for field-level assertions — no free-text AI output parsing. Uses `--append-system-prompt` + `--tools` to force deterministic tool calls.
|
|
26
|
-
- **Full coverage**:
|
|
42
|
+
- **Full coverage**: 205 tests total — function-level integration tests (12), mock-pi handler tests (4), file-validity checks (6), real `pi --fork --mode json` tests (3 scenarios), propose_goal_tweak unit/integration/e2e tests (15), task list policy/round-trip/render tests (50+), and verification contract tests (14).
|
|
27
43
|
|
|
28
44
|
### Completion auditor
|
|
29
45
|
|
|
@@ -158,9 +174,12 @@ The extension exposes tools only when they make sense for the current lifecycle
|
|
|
158
174
|
| `get_goal` | always | Read the focused goal state; mentions other open goals when present |
|
|
159
175
|
| `propose_goal_draft` | drafting only (goal creation) | Submit a concrete draft for user confirmation |
|
|
160
176
|
| `propose_goal_tweak` | tweak drafting only | Submit a revision to an existing goal (shows Confirm / Continue Chatting dialog) |
|
|
161
|
-
| `
|
|
177
|
+
| `complete_goal` | focused active or paused goal | Mark the focused goal complete — supply a `verificationSummary` covering all contract items. When the auditor is disabled, supply `confirmBypassAuditor: true` after user confirmation to bypass the audit |
|
|
162
178
|
| `pause_goal` | focused active goal | Pause the focused goal because of a real blocker |
|
|
163
179
|
| `abort_goal` | focused active or paused goal | Abort/archive an obsolete, impossible, unsafe, or user-cancelled focused goal |
|
|
180
|
+
| `propose_task_list` | active or paused goal | Propose a structured task list for user confirmation (stops the turn) |
|
|
181
|
+
| `complete_task` | active or paused goal | Mark a task complete with optional `verificationSummary`. If the task has a `verificationContract`, the summary is required (does not stop turn) |
|
|
182
|
+
| `skip_task` | active or paused goal | Mark a task skipped with a required reason (does not stop turn) |
|
|
164
183
|
| `propose_goal_tweak` | tweak drafting only | Submit a revision to the focused goal (shows Confirm / Continue Chatting dialog) |
|
|
165
184
|
| `step_complete` | hidden / legacy | Compatibility no-op; Sisyphus no longer requires a step counter |
|
|
166
185
|
| `create_goal` | hidden | Direct calls are rejected; normal creation goes through `propose_goal_draft` |
|
|
@@ -228,7 +247,7 @@ The shipped gates are intentionally small and mechanical.
|
|
|
228
247
|
| Completion auditor gate | Archiving completion unless an independent pi auditor agent returns `<approved/>` |
|
|
229
248
|
| Abort gate | Aborting missing, stale, completed, or reasonless goals |
|
|
230
249
|
| Direct-create rejection | Hidden `create_goal` calls creating goals without the confirmation flow |
|
|
231
|
-
| Post-stop block | Continuing to call tools after `pause_goal`, `abort_goal`, `
|
|
250
|
+
| Post-stop block | Continuing to call tools after `pause_goal`, `abort_goal`, `complete_goal`, or `propose_goal_tweak` stops the turn |
|
|
232
251
|
| Empty-turn guard | Pure chat loops that would keep auto-continuing without meaningful goal work |
|
|
233
252
|
| Abort pause | Active goals staying active after user abort / Ctrl-C |
|
|
234
253
|
| Disk reconciliation | External pause/archive/delete/status changes being ignored or overwritten by stale memory |
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
-> runtime 重新计算 prompt 与 tool surface
|
|
29
29
|
-> 执行 agent 按 focused goal 工作
|
|
30
30
|
-> tool call / turn event 更新 accounting 与 ledger
|
|
31
|
-
-> 执行 agent 调用
|
|
31
|
+
-> 执行 agent 调用 complete_goal 请求完成
|
|
32
32
|
-> 独立 auditor agent 检查完成声明
|
|
33
33
|
-> 只有 auditor approval 才归档为 complete
|
|
34
34
|
```
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
-> 用户确认
|
|
44
44
|
-> 写入 active goal 文件并设置 focus
|
|
45
45
|
-> agent 跨一个或多个 turn 执行工作
|
|
46
|
-
-> agent 调用
|
|
46
|
+
-> agent 调用 complete_goal(status="complete")
|
|
47
47
|
-> 对话中出现 Goal audit started
|
|
48
48
|
-> auditor session 检查真实产物
|
|
49
49
|
-> 对话中出现 Goal audit approved
|
|
@@ -200,7 +200,7 @@ interface GoalConfirmationIntent {
|
|
|
200
200
|
| `goal_question` / `goal_questionnaire` | goal confirmation / tweak drafting 中的结构化用户对话。 |
|
|
201
201
|
| `propose_goal_draft` | 提交 goal 草案给用户确认;没有 confirmation intent 时会被 validator 拒绝。 |
|
|
202
202
|
| `apply_goal_tweak` | 提交并应用 goal 修改。 |
|
|
203
|
-
| `
|
|
203
|
+
| `complete_goal` | 请求完成目标,并触发独立审计。 |
|
|
204
204
|
| `pause_goal` | agent 因真实 blocker 暂停目标。 |
|
|
205
205
|
| `abort_goal` | agent 因目标废弃、不可行、不安全等原因中止目标。 |
|
|
206
206
|
| `step_complete` | 隐藏的 legacy no-op;Sisyphus 不再使用 step counter。 |
|
|
@@ -267,7 +267,7 @@ completion 不信任执行 agent 单方声明,而是一个双 agent 协议。
|
|
|
267
267
|
}
|
|
268
268
|
```
|
|
269
269
|
|
|
270
|
-
`
|
|
270
|
+
`complete_goal` 会先校验 focused goal 是否可以完成,然后写入 `completion_requested` ledger event。
|
|
271
271
|
|
|
272
272
|
### 9.2 对话中出现 audit started
|
|
273
273
|
|
|
@@ -281,7 +281,7 @@ Auditor model: ...
|
|
|
281
281
|
Completion claim: ...
|
|
282
282
|
```
|
|
283
283
|
|
|
284
|
-
这让 audit 成为 transcript 里一个明确的 agentic 阶段,而不是隐藏在 `
|
|
284
|
+
这让 audit 成为 transcript 里一个明确的 agentic 阶段,而不是隐藏在 `complete_goal` tool result 里。
|
|
285
285
|
|
|
286
286
|
### 9.3 独立 auditor session
|
|
287
287
|
|
|
@@ -343,7 +343,7 @@ Audit Report 或 rejection reason
|
|
|
343
343
|
|
|
344
344
|
agent 可以在真实 blocker 下调用 `pause_goal`。用户也可以用 `/goal-pause` 或 abort active run 来暂停目标。
|
|
345
345
|
|
|
346
|
-
`pause_goal`、`abort_goal`、`
|
|
346
|
+
`pause_goal`、`abort_goal`、`complete_goal`、`apply_goal_tweak` 成功后,会设置 `turnStoppedFor`。之后同一个 turn 里,`tool_call` hook 会阻止额外的非允许工具调用。这个 hard gate 仍然保留:生命周期已经 stop 后,agent 应该总结并交还控制,而不是继续修改文件。
|
|
347
347
|
|
|
348
348
|
pause 与 abort 的区别:
|
|
349
349
|
|
|
@@ -391,7 +391,7 @@ Execution runtime
|
|
|
391
391
|
v
|
|
392
392
|
Executor agent
|
|
393
393
|
|-- 正常 read/write/bash/edit 工作
|
|
394
|
-
|-- pause_goal / abort_goal /
|
|
394
|
+
|-- pause_goal / abort_goal / complete_goal
|
|
395
395
|
v
|
|
396
396
|
Completion request
|
|
397
397
|
|-- 对话中出现 Goal audit started
|
|
@@ -131,7 +131,7 @@ The following behaviors remain runtime-enforced:
|
|
|
131
131
|
4. **Mode consistency.** A draft proposal cannot silently change `/goals` into Sisyphus or `/sisyphus` into a regular goal.
|
|
132
132
|
5. **Stale continuation protection.** A queued continuation for an old goal cannot perform work for a different current goal.
|
|
133
133
|
6. **Human-owned focus.** The agent cannot silently switch focus between open goals.
|
|
134
|
-
7. **Completion audit.** `
|
|
134
|
+
7. **Completion audit.** `complete_goal(status="complete")` archives only if the independent auditor returns exactly one approving marker.
|
|
135
135
|
8. **Path safety.** Goal files and archives must remain under expected `.pi/goals` paths.
|
|
136
136
|
9. **Post-stop transaction boundary.** After pause, abort, approved completion, or applied tweak, the same turn should not continue substantive work.
|
|
137
137
|
10. **No hard cost control/cap lifecycle.** Resource-control is outside this runtime; auto-continue uses semantic stop conditions and the empty-turn guard.
|
|
@@ -211,7 +211,7 @@ The runtime keeps tools for irreversible transitions:
|
|
|
211
211
|
|
|
212
212
|
- `propose_goal_draft`
|
|
213
213
|
- `get_goal`
|
|
214
|
-
- `
|
|
214
|
+
- `complete_goal`
|
|
215
215
|
- `pause_goal`
|
|
216
216
|
- `abort_goal`
|
|
217
217
|
- `apply_goal_tweak`
|
package/docs/architecture.md
CHANGED
|
@@ -193,7 +193,7 @@ Continuation prompts include a goal id so stale prompts can be detected and neut
|
|
|
193
193
|
|
|
194
194
|
## Completion output
|
|
195
195
|
|
|
196
|
-
Completion is intentionally verbose in the tool result and guarded by an independent auditor agent. `
|
|
196
|
+
Completion is intentionally verbose in the tool result and guarded by an independent auditor agent. `complete_goal(status="complete")` is valid for active and paused goals; paused goals do not need to be resumed just to record completion when existing evidence is sufficient.
|
|
197
197
|
|
|
198
198
|
Before archiving, the tool starts a separate in-memory pi session with a focused auditor prompt. The auditor receives the objective, executor completion summary, and goal metadata, can inspect the workspace with `read`, `grep`, `find`, `ls`, and `bash`, and must end with exactly one marker:
|
|
199
199
|
|
|
@@ -13,7 +13,7 @@ import {
|
|
|
13
13
|
type ExtensionContext,
|
|
14
14
|
type ResourceLoader,
|
|
15
15
|
} from "@earendil-works/pi-coding-agent";
|
|
16
|
-
import type { GoalRecord } from "./goal-record.ts";
|
|
16
|
+
import type { GoalRecord, GoalTaskList } from "./goal-record.ts";
|
|
17
17
|
|
|
18
18
|
export interface GoalAuditorConfig {
|
|
19
19
|
provider?: string;
|
|
@@ -127,22 +127,34 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
|
|
|
127
127
|
return { approved: approved && !disapproved, disapproved };
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
-
export interface
|
|
131
|
-
/**
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
130
|
+
export interface AuditorVerificationEvidence {
|
|
131
|
+
/** The agent's verification summary describing what was checked. */
|
|
132
|
+
summary: string;
|
|
133
|
+
/** The goal's verification contract (what the agent was required to verify), if any. */
|
|
134
|
+
contract?: string;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function taskSummaryBlock(taskList?: GoalTaskList | null): string {
|
|
138
|
+
if (!taskList || taskList.tasks.length === 0) return "";
|
|
139
|
+
const total = taskList.tasks.length;
|
|
140
|
+
const complete = taskList.tasks.filter((t) => t.status === "complete").length;
|
|
141
|
+
const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
|
|
142
|
+
const pending = taskList.tasks.filter((t) => t.status === "pending");
|
|
143
|
+
const lines: string[] = [`Tasks: ${complete}/${total} complete${skipped > 0 ? `, ${skipped} skipped` : ""}`];
|
|
144
|
+
for (const task of taskList.tasks) {
|
|
145
|
+
const marker = task.status === "complete" ? "[x]" : task.status === "skipped" ? "[~]" : "[ ]";
|
|
146
|
+
lines.push(` ${marker} ${task.id}: ${task.title}`);
|
|
147
|
+
}
|
|
148
|
+
const gate = taskList.blockCompletion && pending.length > 0 ? " | TASK GATE: pending tasks block completion" : "";
|
|
149
|
+
lines[0] = lines[0]! + gate;
|
|
150
|
+
return lines.join("\n");
|
|
139
151
|
}
|
|
140
152
|
|
|
141
153
|
export function buildGoalAuditorPrompt(args: {
|
|
142
154
|
goal: GoalRecord;
|
|
143
155
|
completionSummary?: string | null;
|
|
144
156
|
detailedSummary: string;
|
|
145
|
-
|
|
157
|
+
verificationSummary?: string | null;
|
|
146
158
|
}): string {
|
|
147
159
|
return [
|
|
148
160
|
"You are the independent completion auditor for pi-goal.",
|
|
@@ -168,32 +180,36 @@ export function buildGoalAuditorPrompt(args: {
|
|
|
168
180
|
"Current goal metadata:",
|
|
169
181
|
"<goal_details>",
|
|
170
182
|
args.detailedSummary,
|
|
183
|
+
...(taskSummaryBlock(args.goal.taskList) ? ["", taskSummaryBlock(args.goal.taskList)] : []),
|
|
171
184
|
"</goal_details>",
|
|
172
|
-
...(args.
|
|
185
|
+
...(args.verificationSummary?.trim() ? [
|
|
173
186
|
"",
|
|
174
|
-
"Executor
|
|
175
|
-
"<
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
"
|
|
187
|
+
"Executor verification summary:",
|
|
188
|
+
"<verification_summary>",
|
|
189
|
+
args.verificationSummary.trim(),
|
|
190
|
+
"</verification_summary>",
|
|
191
|
+
] : []),
|
|
192
|
+
...(args.goal.verificationContract?.trim() ? [
|
|
193
|
+
"",
|
|
194
|
+
"Goal verification contract (what the executor was required to verify):",
|
|
195
|
+
"<verification_contract>",
|
|
196
|
+
args.goal.verificationContract.trim(),
|
|
197
|
+
"</verification_contract>",
|
|
182
198
|
] : []),
|
|
183
199
|
"",
|
|
184
200
|
"Audit checklist:",
|
|
185
|
-
...
|
|
186
|
-
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
187
|
-
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
188
|
-
"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
|
|
189
|
-
"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
190
|
-
"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
191
|
-
] : [
|
|
201
|
+
...[
|
|
192
202
|
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
193
203
|
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
204
|
+
...(args.verificationSummary?.trim()
|
|
205
|
+
? ["3. Check the <verification_summary> against real artifacts. If the executor claims to have run tests or searched for references, verify those claims with actual file/shell evidence. The summary is a claim, not proof — cross-check it."]
|
|
206
|
+
: []),
|
|
207
|
+
...(args.goal.verificationContract?.trim()
|
|
208
|
+
? ["4. Verify that the executor has satisfied every item in the <verification_contract>. If any item is missing or weakly addressed, disapprove."]
|
|
209
|
+
: []),
|
|
210
|
+
"5. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
211
|
+
"6. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
212
|
+
],
|
|
197
213
|
"",
|
|
198
214
|
"Progress reporting:",
|
|
199
215
|
"You have the report_auditor_progress tool available to report your progress to the user.",
|
|
@@ -271,7 +287,7 @@ export async function runGoalCompletionAuditor(args: {
|
|
|
271
287
|
goal: GoalRecord;
|
|
272
288
|
completionSummary?: string | null;
|
|
273
289
|
detailedSummary: string;
|
|
274
|
-
|
|
290
|
+
verificationSummary?: string | null;
|
|
275
291
|
signal?: AbortSignal;
|
|
276
292
|
onProgress?: AuditorProgressCallback;
|
|
277
293
|
/**
|
|
@@ -46,6 +46,15 @@ export function buildGoalCompactSummary(goal: GoalRecord, events: GoalLedgerEven
|
|
|
46
46
|
case "goal_completed":
|
|
47
47
|
lines.push(" - completed");
|
|
48
48
|
break;
|
|
49
|
+
case "task_list_set":
|
|
50
|
+
lines.push(` - task list set: ${event.taskCount} tasks${event.blockCompletion ? " (blocking)" : ""}`);
|
|
51
|
+
break;
|
|
52
|
+
case "task_complete":
|
|
53
|
+
lines.push(` - task complete: ${event.taskId}${event.evidence ? ` — ${truncateText(event.evidence, 60)}` : ""}`);
|
|
54
|
+
break;
|
|
55
|
+
case "task_skipped":
|
|
56
|
+
lines.push(` - task skipped: ${event.taskId} — ${truncateText(event.reason, 60)}`);
|
|
57
|
+
break;
|
|
49
58
|
case "goal_aborted":
|
|
50
59
|
lines.push(` - aborted: ${event.reason}`);
|
|
51
60
|
break;
|
package/extensions/goal-draft.ts
CHANGED
|
@@ -26,6 +26,57 @@ export function promptSafeObjective(objective: string): string {
|
|
|
26
26
|
return objective.replace(/<\/?untrusted_objective>/gi, (tag) => tag.replace(/</g, "<").replace(/>/g, ">"));
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
const VERIFICATION_CONTRACT_RE = /^Verification contract:\s*(.+)$/im;
|
|
30
|
+
|
|
31
|
+
const CONVENTIONAL_SECTION_NAMES = [
|
|
32
|
+
"success criteria",
|
|
33
|
+
"boundaries",
|
|
34
|
+
"constraints",
|
|
35
|
+
"if blocked",
|
|
36
|
+
"if blocked / unclear / failing",
|
|
37
|
+
"don'ts",
|
|
38
|
+
"sisyphus reminder",
|
|
39
|
+
"objective",
|
|
40
|
+
"目标",
|
|
41
|
+
"ordered steps",
|
|
42
|
+
"order rules",
|
|
43
|
+
"steps",
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Extract a `Verification contract:` section from a goal objective and return
|
|
48
|
+
* the cleaned objective (without the contract section) and the contract text.
|
|
49
|
+
*
|
|
50
|
+
* The contract section is a single line matching:
|
|
51
|
+
* Verification contract: <text>
|
|
52
|
+
*
|
|
53
|
+
* It can appear anywhere in the objective, but by convention it goes after
|
|
54
|
+
* the other sections (like Success criteria, Boundaries, Constraints).
|
|
55
|
+
*
|
|
56
|
+
* If no contract section is found, `verificationContract` is undefined.
|
|
57
|
+
*/
|
|
58
|
+
export function extractVerificationContract(objective: string): { objective: string; verificationContract?: string } {
|
|
59
|
+
const lines = objective.replace(/\r/g, "").split("\n");
|
|
60
|
+
let contract: string | undefined;
|
|
61
|
+
const filtered: string[] = [];
|
|
62
|
+
|
|
63
|
+
for (const line of lines) {
|
|
64
|
+
const trimmed = line.trim();
|
|
65
|
+
const m = VERIFICATION_CONTRACT_RE.exec(trimmed);
|
|
66
|
+
if (m) {
|
|
67
|
+
contract = m[1].trim();
|
|
68
|
+
// Skip this line — don't add it to the cleaned objective
|
|
69
|
+
} else {
|
|
70
|
+
filtered.push(line);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
objective: filtered.join("\n"),
|
|
76
|
+
verificationContract: contract || undefined,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
29
80
|
export function buildDraftConfirmationText(args: {
|
|
30
81
|
focus: GoalDraftingFocus;
|
|
31
82
|
originalTopic: string;
|
|
@@ -131,6 +182,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
|
|
|
131
182
|
"- If the topic is already concrete, you may proceed directly to propose_goal_draft.",
|
|
132
183
|
"- The goal contract should make the objective, success criteria, boundaries, constraints, and blocker rule explicit.",
|
|
133
184
|
"- Keep grilling assumptions until the objective, success criteria, boundaries, constraints, and blocker rule are clear enough to confirm.",
|
|
185
|
+
"- After a goal is confirmed, you may call propose_task_list on the first continuation turn if the objective naturally decomposes into trackable milestones. Do not add a task list for simple, single-step goals.",
|
|
134
186
|
"- propose_goal_draft opens the user's Confirm / Continue Chatting dialog. Confirm creates and focuses the goal; Continue Chatting means keep refining through normal proposal cycles.",
|
|
135
187
|
"- create_goal is not a shortcut. Direct create_goal calls are rejected so the user keeps explicit say in goal creation.",
|
|
136
188
|
];
|
|
@@ -142,6 +194,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
|
|
|
142
194
|
"Success criteria: <observable evidence the goal is done>",
|
|
143
195
|
"Boundaries: <in scope / out of scope>",
|
|
144
196
|
"Constraints: <hard rules>",
|
|
197
|
+
"Verification contract: <optional — what verification evidence is required before marking complete, e.g. 'Run npm test (0 failures), grep for remaining references, re-read requirements and confirm every item is addressed'>",
|
|
145
198
|
"If blocked: <default = stop and ask the user>",
|
|
146
199
|
"Call propose_goal_draft with sisyphus=false and autoContinue=true unless the user asked otherwise.",
|
|
147
200
|
];
|
|
@@ -154,6 +207,7 @@ export function goalDraftingPrompt(topic: string, focus: GoalDraftingFocus): str
|
|
|
154
207
|
"Success criteria: <observable evidence the whole ordered goal is done>",
|
|
155
208
|
"Boundaries: <in scope / out of scope>",
|
|
156
209
|
"Constraints: <hard rules, files not to touch, etc.>",
|
|
210
|
+
"Verification contract: <optional — what verification evidence is required before marking complete>",
|
|
157
211
|
"Ordered steps: <preserve the user's requested steps and ordering; do not add preflight or reconnaissance steps they did not ask for>",
|
|
158
212
|
"If blocked / unclear / failing: <default = stop and ask the user>",
|
|
159
213
|
"Sisyphus reminder: Work patiently and sequentially. No rushing, no unrequested preflight steps, no improvising around blockers.",
|
|
@@ -16,7 +16,10 @@ export type GoalLedgerEvent =
|
|
|
16
16
|
| { type: "audit_result"; goalId: string; verdict: "approved" | "disapproved" | "error"; report: string; at: string }
|
|
17
17
|
| { type: "audit_skipped"; goalId: string; reason: "disabled" | "user_aborted"; provider?: string; model?: string; thinkingLevel?: string; at: string }
|
|
18
18
|
| { type: "goal_completed"; goalId: string; archivePath?: string; at: string }
|
|
19
|
-
| { type: "goal_aborted"; goalId: string; reason: string; archivePath?: string; at: string }
|
|
19
|
+
| { type: "goal_aborted"; goalId: string; reason: string; archivePath?: string; at: string }
|
|
20
|
+
| { type: "task_list_set"; goalId: string; taskCount: number; blockCompletion: boolean; at: string }
|
|
21
|
+
| { type: "task_complete"; goalId: string; taskId: string; evidence?: string; at: string }
|
|
22
|
+
| { type: "task_skipped"; goalId: string; taskId: string; reason: string; at: string };
|
|
20
23
|
|
|
21
24
|
export interface GoalLedgerContext {
|
|
22
25
|
cwd: string;
|
|
@@ -147,6 +150,12 @@ function isValidLedgerEvent(value: unknown): value is GoalLedgerEvent {
|
|
|
147
150
|
return typeof obj.goalId === "string" && (obj.archivePath === undefined || typeof obj.archivePath === "string");
|
|
148
151
|
case "goal_aborted":
|
|
149
152
|
return typeof obj.goalId === "string" && typeof obj.reason === "string" && (obj.archivePath === undefined || typeof obj.archivePath === "string");
|
|
153
|
+
case "task_list_set":
|
|
154
|
+
return typeof obj.goalId === "string" && typeof obj.taskCount === "number" && typeof obj.blockCompletion === "boolean";
|
|
155
|
+
case "task_complete":
|
|
156
|
+
return typeof obj.goalId === "string" && typeof obj.taskId === "string" && (obj.evidence === undefined || typeof obj.evidence === "string");
|
|
157
|
+
case "task_skipped":
|
|
158
|
+
return typeof obj.goalId === "string" && typeof obj.taskId === "string" && typeof obj.reason === "string";
|
|
150
159
|
default:
|
|
151
160
|
return false;
|
|
152
161
|
}
|
|
@@ -176,6 +185,12 @@ function sanitizeEvent(event: GoalLedgerEvent): GoalLedgerEvent {
|
|
|
176
185
|
return { ...event, goalId: safeGoalId(event.goalId) };
|
|
177
186
|
case "goal_aborted":
|
|
178
187
|
return { ...event, goalId: safeGoalId(event.goalId) };
|
|
188
|
+
case "task_list_set":
|
|
189
|
+
return { ...event, goalId: safeGoalId(event.goalId) };
|
|
190
|
+
case "task_complete":
|
|
191
|
+
return { ...event, goalId: safeGoalId(event.goalId) };
|
|
192
|
+
case "task_skipped":
|
|
193
|
+
return { ...event, goalId: safeGoalId(event.goalId) };
|
|
179
194
|
case "goal_unfocused":
|
|
180
195
|
return event;
|
|
181
196
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { statusLabel, type GoalDisplayRecordLike } from "./goal-core.ts";
|
|
2
|
+
import type { GoalTaskList, TaskStatus } from "./goal-record.ts";
|
|
2
3
|
|
|
3
4
|
export type GoalStatusLike = "active" | "paused" | "complete";
|
|
4
5
|
export type StopReasonLike = "user" | "agent";
|
|
@@ -9,6 +10,7 @@ export interface GoalPolicyRecordLike extends GoalDisplayRecordLike {
|
|
|
9
10
|
updatedAt?: string;
|
|
10
11
|
pauseReason?: string;
|
|
11
12
|
pauseSuggestedAction?: string;
|
|
13
|
+
taskList?: GoalTaskList;
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
export type PolicyValidation =
|
|
@@ -39,7 +41,7 @@ export function validateGoalCompletion(args: {
|
|
|
39
41
|
const { goal, runningGoalId } = args;
|
|
40
42
|
if (!goal) return { ok: false, message: "No goal is set." };
|
|
41
43
|
if (runningGoalId && goal.id !== runningGoalId) return { ok: false, message: "The active goal changed during this run; not marking it complete." };
|
|
42
|
-
if (!isCompletableStatus(goal.status)) return { ok: false, message: `Goal is ${statusLabel(goal)};
|
|
44
|
+
if (!isCompletableStatus(goal.status)) return { ok: false, message: `Goal is ${statusLabel(goal)}; complete_goal does not apply.` };
|
|
43
45
|
return { ok: true };
|
|
44
46
|
}
|
|
45
47
|
|
|
@@ -124,7 +126,87 @@ export function abortGoalCommandMessage(args: { archived: boolean; wasDrafting:
|
|
|
124
126
|
return args.archived ? "Goal aborted and archived." : args.wasDrafting ? "Drafting cancelled." : "No goal is set.";
|
|
125
127
|
}
|
|
126
128
|
|
|
127
|
-
export function
|
|
129
|
+
export function buildTaskSummary(taskList: GoalTaskList): string {
|
|
130
|
+
const total = taskList.tasks.length;
|
|
131
|
+
const complete = taskList.tasks.filter((t) => t.status === "complete").length;
|
|
132
|
+
const skipped = taskList.tasks.filter((t) => t.status === "skipped").length;
|
|
133
|
+
if (total === 0) return "No tasks";
|
|
134
|
+
const parts: string[] = [`${complete}/${total} tasks complete`];
|
|
135
|
+
if (skipped > 0) parts.push(`(${skipped} skipped)`);
|
|
136
|
+
return parts.join(" ");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export function taskCompletionBlockWarning(taskList: GoalTaskList): string | null {
|
|
140
|
+
if (!taskList.blockCompletion) return null;
|
|
141
|
+
const pending = taskList.tasks.filter((t) => t.status === "pending");
|
|
142
|
+
if (pending.length === 0) return null;
|
|
143
|
+
return `${pending.length} task${pending.length > 1 ? "s" : ""} still pending with blockCompletion enabled. Complete or skip all pending tasks before finishing the goal.`;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Validate that a verificationSummary satisfies a verificationContract.
|
|
148
|
+
* If a contract exists, the summary must be non-empty.
|
|
149
|
+
*/
|
|
150
|
+
export function validateVerificationSummary(args: {
|
|
151
|
+
verificationContract?: string | null;
|
|
152
|
+
verificationSummary?: string | null;
|
|
153
|
+
}): PolicyValidation {
|
|
154
|
+
const contract = args.verificationContract?.trim();
|
|
155
|
+
const summary = args.verificationSummary?.trim();
|
|
156
|
+
if (contract && !summary) {
|
|
157
|
+
return {
|
|
158
|
+
ok: false,
|
|
159
|
+
message: `This goal has a verification contract but no verificationSummary was provided. Provide a verificationSummary that addresses the contract requirements.`,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
return { ok: true };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export function validateTaskCompletion(args: {
|
|
166
|
+
goal: GoalPolicyRecordLike | null;
|
|
167
|
+
taskId: string;
|
|
168
|
+
}): PolicyValidation {
|
|
169
|
+
if (!args.goal) return { ok: false, message: "No goal is set." };
|
|
170
|
+
if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
|
|
171
|
+
const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
|
|
172
|
+
if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
|
|
173
|
+
if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
|
|
174
|
+
if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
|
|
175
|
+
return { ok: true };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function validateTaskSkip(args: {
|
|
179
|
+
goal: GoalPolicyRecordLike | null;
|
|
180
|
+
taskId: string;
|
|
181
|
+
reason: string;
|
|
182
|
+
}): PolicyValidation {
|
|
183
|
+
if (!args.goal) return { ok: false, message: "No goal is set." };
|
|
184
|
+
if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
|
|
185
|
+
const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
|
|
186
|
+
if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
|
|
187
|
+
if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
|
|
188
|
+
if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
|
|
189
|
+
if (!args.reason.trim()) return { ok: false, message: "skip_task requires a non-empty reason." };
|
|
190
|
+
return { ok: true };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
export function validateTaskListProposal(args: {
|
|
194
|
+
goal: GoalPolicyRecordLike | null;
|
|
195
|
+
tasks: { id: string; title: string }[];
|
|
196
|
+
}): PolicyValidation {
|
|
197
|
+
if (!args.goal) return { ok: false, message: "No goal is set." };
|
|
198
|
+
if (args.tasks.length > 50) return { ok: false, message: "Task list cannot exceed 50 tasks." };
|
|
199
|
+
const ids = new Set<string>();
|
|
200
|
+
for (const t of args.tasks) {
|
|
201
|
+
if (!t.id.trim()) return { ok: false, message: "All tasks must have a non-empty id." };
|
|
202
|
+
if (!t.title.trim()) return { ok: false, message: `Task "${t.id}" must have a non-empty title.` };
|
|
203
|
+
if (ids.has(t.id)) return { ok: false, message: `Duplicate task id: "${t.id}".` };
|
|
204
|
+
ids.add(t.id);
|
|
205
|
+
}
|
|
206
|
+
return { ok: true };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export function buildCompletionReport(args: { detailedSummary: string; completionSummary?: string | null; auditorReport?: string | null; auditSkippedReason?: string | null; taskSummary?: string | null }): string {
|
|
128
210
|
const auditSkipped = args.auditSkippedReason?.trim();
|
|
129
211
|
const auditorReport = args.auditorReport?.trim();
|
|
130
212
|
const lines = auditSkipped
|
|
@@ -136,6 +218,10 @@ export function buildCompletionReport(args: { detailedSummary: string; completio
|
|
|
136
218
|
if (summary) {
|
|
137
219
|
lines.push("", "Completion summary:", summary);
|
|
138
220
|
}
|
|
221
|
+
const taskSummary = args.taskSummary?.trim();
|
|
222
|
+
if (taskSummary) {
|
|
223
|
+
lines.push("", `Task summary: ${taskSummary}`);
|
|
224
|
+
}
|
|
139
225
|
lines.push("", args.detailedSummary);
|
|
140
226
|
return lines.join("\n");
|
|
141
227
|
}
|
|
@@ -318,7 +318,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
|
|
|
318
318
|
const selected = i === optionIndex;
|
|
319
319
|
const prefix = selected ? theme.fg("accent", "> ") : " ";
|
|
320
320
|
const recTag = !opt.isCustom && q?.recommended === i ? theme.fg("success", " ★") : "";
|
|
321
|
-
|
|
321
|
+
addWrapped(prefix + theme.fg(selected ? "accent" : "text", `${i + 1}. ${opt.label}`) + recTag);
|
|
322
322
|
}
|
|
323
323
|
}
|
|
324
324
|
|
|
@@ -4,6 +4,25 @@ export type GoalEventKind = "checkpoint" | "stale" | "drafting";
|
|
|
4
4
|
export type DraftingFocus = "goal" | "sisyphus";
|
|
5
5
|
export type GoalFocusReason = "created" | "selected" | "resumed" | "completed" | "cleared" | "aborted" | "migrated";
|
|
6
6
|
|
|
7
|
+
export type TaskStatus = "pending" | "complete" | "skipped";
|
|
8
|
+
|
|
9
|
+
export interface GoalTask {
|
|
10
|
+
id: string;
|
|
11
|
+
title: string;
|
|
12
|
+
status: TaskStatus;
|
|
13
|
+
completedAt?: string;
|
|
14
|
+
skippedAt?: string;
|
|
15
|
+
evidence?: string;
|
|
16
|
+
skipReason?: string;
|
|
17
|
+
verificationContract?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface GoalTaskList {
|
|
21
|
+
tasks: GoalTask[];
|
|
22
|
+
blockCompletion: boolean;
|
|
23
|
+
proposedAt: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
7
26
|
export interface GoalUsage {
|
|
8
27
|
tokensUsed: number;
|
|
9
28
|
activeSeconds: number;
|
|
@@ -24,6 +43,9 @@ export interface GoalRecord {
|
|
|
24
43
|
// Set by the agent's pause_goal tool. Cleared when the goal becomes active again.
|
|
25
44
|
pauseReason?: string;
|
|
26
45
|
pauseSuggestedAction?: string;
|
|
46
|
+
taskList?: GoalTaskList;
|
|
47
|
+
/** Plain-text description of what verification evidence is required before completing this goal. */
|
|
48
|
+
verificationContract?: string;
|
|
27
49
|
}
|
|
28
50
|
|
|
29
51
|
export interface GoalStateEntry {
|
|
@@ -90,7 +112,13 @@ export function emptyUsage(): GoalUsage {
|
|
|
90
112
|
}
|
|
91
113
|
|
|
92
114
|
export function cloneGoal(goal: GoalRecord): GoalRecord {
|
|
93
|
-
return {
|
|
115
|
+
return {
|
|
116
|
+
...goal,
|
|
117
|
+
usage: { ...goal.usage },
|
|
118
|
+
taskList: goal.taskList
|
|
119
|
+
? { ...goal.taskList, tasks: goal.taskList.tasks.map(t => ({ ...t })) }
|
|
120
|
+
: undefined,
|
|
121
|
+
};
|
|
94
122
|
}
|
|
95
123
|
|
|
96
124
|
export function goalFocusDetails(focusedGoalId: string | null, reason: GoalFocusReason): GoalFocusEntry {
|
|
@@ -136,6 +164,38 @@ export function normalizeUsage(value: unknown): GoalUsage {
|
|
|
136
164
|
return { tokensUsed, activeSeconds };
|
|
137
165
|
}
|
|
138
166
|
|
|
167
|
+
export function normalizeTaskList(value: unknown): GoalTaskList | undefined {
|
|
168
|
+
const raw = asRecord(value);
|
|
169
|
+
if (!raw) return undefined;
|
|
170
|
+
const tasksRaw = raw.tasks;
|
|
171
|
+
if (!Array.isArray(tasksRaw)) return undefined;
|
|
172
|
+
const tasks: GoalTask[] = [];
|
|
173
|
+
for (const item of tasksRaw) {
|
|
174
|
+
if (!item || typeof item !== "object" || Array.isArray(item)) continue;
|
|
175
|
+
const t = item as Record<string, unknown>;
|
|
176
|
+
const id = typeof t.id === "string" && t.id.trim() ? t.id.trim() : "";
|
|
177
|
+
const title = typeof t.title === "string" ? t.title.trim() : "";
|
|
178
|
+
if (!id || !title) continue;
|
|
179
|
+
const status: TaskStatus = t.status === "complete" ? "complete" : t.status === "skipped" ? "skipped" : "pending";
|
|
180
|
+
tasks.push({
|
|
181
|
+
id,
|
|
182
|
+
title,
|
|
183
|
+
status,
|
|
184
|
+
completedAt: typeof t.completedAt === "string" ? t.completedAt : undefined,
|
|
185
|
+
skippedAt: typeof t.skippedAt === "string" ? t.skippedAt : undefined,
|
|
186
|
+
evidence: typeof t.evidence === "string" ? t.evidence : undefined,
|
|
187
|
+
skipReason: typeof t.skipReason === "string" ? t.skipReason : undefined,
|
|
188
|
+
verificationContract: typeof t.verificationContract === "string" ? t.verificationContract : undefined,
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
if (tasks.length === 0) return undefined;
|
|
192
|
+
return {
|
|
193
|
+
tasks,
|
|
194
|
+
blockCompletion: raw.blockCompletion === true,
|
|
195
|
+
proposedAt: typeof raw.proposedAt === "string" ? raw.proposedAt : nowIso(),
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
139
199
|
export function normalizeGoalRecord(value: unknown): GoalRecord | null {
|
|
140
200
|
const raw = asRecord(value);
|
|
141
201
|
if (!raw) return null;
|
|
@@ -167,5 +227,7 @@ export function normalizeGoalRecord(value: unknown): GoalRecord | null {
|
|
|
167
227
|
stopReason: raw.stopReason === "agent" || raw.stopReason === "user" ? raw.stopReason : undefined,
|
|
168
228
|
pauseReason: typeof raw.pauseReason === "string" && raw.pauseReason.trim() ? raw.pauseReason : undefined,
|
|
169
229
|
pauseSuggestedAction: typeof raw.pauseSuggestedAction === "string" && raw.pauseSuggestedAction.trim() ? raw.pauseSuggestedAction : undefined,
|
|
230
|
+
taskList: normalizeTaskList(raw.taskList),
|
|
231
|
+
verificationContract: typeof raw.verificationContract === "string" ? raw.verificationContract : undefined,
|
|
170
232
|
};
|
|
171
233
|
}
|