pi-goal-x 0.10.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/extensions/goal-auditor.ts +36 -4
- package/extensions/goal.ts +14 -51
- package/extensions/prompts/goal-prompts.ts +6 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,10 +10,10 @@ The extension is designed around one rule: **the user owns intent; the agent exe
|
|
|
10
10
|
|
|
11
11
|
All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are preserved. The following changes are specific to pi-goal-x:
|
|
12
12
|
|
|
13
|
-
###
|
|
13
|
+
### Goal objective is immutable
|
|
14
14
|
|
|
15
|
-
-
|
|
16
|
-
- **`apply_goal_tweak`**
|
|
15
|
+
- The goal objective is immutable — the agent **must not** modify it autonomously. Objective changes are only possible through `apply_goal_tweak`, which is gated behind the user-initiated `/goal-tweak` drafting flow. This prevents the agent from silently changing the goal contract.
|
|
16
|
+
- **`apply_goal_tweak`** is the sole mechanism for updating the objective, available exclusively during a `/goal-tweak` drafting interview. If the user's requirements change, they must run `/goal-tweak` to initiate the revision flow.
|
|
17
17
|
|
|
18
18
|
### Deferred archival
|
|
19
19
|
|
|
@@ -35,6 +35,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
|
|
|
35
35
|
- **Cleaner lifecycle** — `AbortSignal` is properly wired to `session.abort()`, animation timers are cleaned up, and the unsubscribe path is always executed. No more having to kill the session.
|
|
36
36
|
- **Completion report includes full auditor output** — the auditor's full report is included in the goal completion conversation message upon approval, not just a verdict.
|
|
37
37
|
- **Session factory injection** — `runGoalCompletionAuditor` accepts an optional `createSession` parameter for testability, enabling mock auditor sessions in tests.
|
|
38
|
+
- **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `update_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
|
|
38
39
|
|
|
39
40
|
### Drafting & UX
|
|
40
41
|
|
|
@@ -127,10 +127,22 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
|
|
|
127
127
|
return { approved: approved && !disapproved, disapproved };
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
+
export interface AuditorTestResults {
|
|
131
|
+
/** Exit code of the test run (0 = success) */
|
|
132
|
+
exitCode: number;
|
|
133
|
+
/** Test suite name, e.g. 'npm test' */
|
|
134
|
+
suiteName?: string;
|
|
135
|
+
/** Last lines of test output showing results */
|
|
136
|
+
output?: string;
|
|
137
|
+
/** ISO timestamp of when tests were run */
|
|
138
|
+
timestamp?: string;
|
|
139
|
+
}
|
|
140
|
+
|
|
130
141
|
export function buildGoalAuditorPrompt(args: {
|
|
131
142
|
goal: GoalRecord;
|
|
132
143
|
completionSummary?: string | null;
|
|
133
144
|
detailedSummary: string;
|
|
145
|
+
testResults?: AuditorTestResults | null;
|
|
134
146
|
}): string {
|
|
135
147
|
return [
|
|
136
148
|
"You are the independent completion auditor for pi-goal.",
|
|
@@ -157,12 +169,31 @@ export function buildGoalAuditorPrompt(args: {
|
|
|
157
169
|
"<goal_details>",
|
|
158
170
|
args.detailedSummary,
|
|
159
171
|
"</goal_details>",
|
|
172
|
+
...(args.testResults ? [
|
|
173
|
+
"",
|
|
174
|
+
"Executor test evidence:",
|
|
175
|
+
"<test_evidence>",
|
|
176
|
+
` Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
|
|
177
|
+
` Exit code: ${args.testResults.exitCode}`,
|
|
178
|
+
` Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
|
|
179
|
+
` Output:`,
|
|
180
|
+
...(args.testResults.output ? args.testResults.output.split("\n").map((l) => ` ${l}`) : [" (none provided)"]),
|
|
181
|
+
"</test_evidence>",
|
|
182
|
+
] : []),
|
|
160
183
|
"",
|
|
161
184
|
"Audit checklist:",
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
185
|
+
...(args.testResults ? [
|
|
186
|
+
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
187
|
+
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
188
|
+
"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
|
|
189
|
+
"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
190
|
+
"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
191
|
+
] : [
|
|
192
|
+
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
193
|
+
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
194
|
+
"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
195
|
+
"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
196
|
+
]),
|
|
166
197
|
"",
|
|
167
198
|
"Progress reporting:",
|
|
168
199
|
"You have the report_auditor_progress tool available to report your progress to the user.",
|
|
@@ -240,6 +271,7 @@ export async function runGoalCompletionAuditor(args: {
|
|
|
240
271
|
goal: GoalRecord;
|
|
241
272
|
completionSummary?: string | null;
|
|
242
273
|
detailedSummary: string;
|
|
274
|
+
testResults?: AuditorTestResults | null;
|
|
243
275
|
signal?: AbortSignal;
|
|
244
276
|
onProgress?: AuditorProgressCallback;
|
|
245
277
|
/**
|
package/extensions/goal.ts
CHANGED
|
@@ -106,7 +106,6 @@ import {
|
|
|
106
106
|
shouldInjectPostCompactReminder,
|
|
107
107
|
validateGoalAbort,
|
|
108
108
|
validateGoalCompletion,
|
|
109
|
-
validateGoalUpdate,
|
|
110
109
|
validatePauseGoal,
|
|
111
110
|
validateResumeGoal,
|
|
112
111
|
} from "./goal-policy.ts";
|
|
@@ -441,6 +440,8 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
441
440
|
active.add(QUESTIONNAIRE_TOOL_NAME);
|
|
442
441
|
} else if (state.goal?.status === "active") {
|
|
443
442
|
for (const name of goalExecutionWorkTools) active.add(name);
|
|
443
|
+
active.add(QUESTION_TOOL_NAME);
|
|
444
|
+
active.add(QUESTIONNAIRE_TOOL_NAME);
|
|
444
445
|
}
|
|
445
446
|
pi.setActiveTools(Array.from(active));
|
|
446
447
|
} catch {}
|
|
@@ -1705,66 +1706,27 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
1705
1706
|
"Do not call update_goal merely because work is stopping, substantial progress was made, or tests passed without covering every requirement.",
|
|
1706
1707
|
"Do not use update_goal=complete as an escape hatch when you are blocked. If you are blocked, call pause_goal({reason, suggestedAction?}) instead so the user can intervene.",
|
|
1707
1708
|
"For sisyphus goals, do not mark complete until every numbered step has been executed and individually verified against its done criterion.",
|
|
1708
|
-
"If the user gives requirements, feedback, or corrections that differ from the goal objective, the
|
|
1709
|
+
"The goal objective is immutable. The agent MUST NOT modify the goal objective on its own initiative. If the user gives requirements, feedback, or corrections that differ from the goal objective, ask the user to run /goal-tweak to revise the goal. Use goal_question to confirm when the change is ambiguous.",
|
|
1710
|
+
"If you have just run the test suite successfully and the tests all pass, include a testResults object with the exit code (0) and relevant output. The auditor will see this evidence and can skip re-running the tests.",
|
|
1709
1711
|
],
|
|
1710
1712
|
parameters: Type.Object({
|
|
1711
1713
|
status: Type.Optional(StringEnum([COMPLETE_STATUS] as const, { description: "Set to complete only when the objective is achieved." })),
|
|
1712
1714
|
completionSummary: Type.Optional(Type.String({ description: "Concise completion claim and evidence summary passed to the independent auditor agent." })),
|
|
1713
1715
|
confirmBypassAuditor: Type.Optional(Type.Boolean({ description: "Set to true to confirm bypassing the independent auditor when it is disabled in settings." })),
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
+
|
|
1717
|
+
testResults: Type.Optional(Type.Object({
|
|
1718
|
+
exitCode: Type.Number({ description: "Exit code of the test run (0 = success)" }),
|
|
1719
|
+
suiteName: Type.Optional(Type.String({ description: "Test suite name, e.g. 'npm test'" })),
|
|
1720
|
+
output: Type.Optional(Type.String({ description: "Last lines of test output showing results" })),
|
|
1721
|
+
timestamp: Type.Optional(Type.String({ description: "ISO timestamp of when tests were run" })),
|
|
1722
|
+
}, { description: "Structured test evidence passed to the auditor so it can skip redundant test re-runs. If you have just run the test suite successfully, include this so the auditor accepts the results without re-running." })),
|
|
1723
|
+
}, { additionalProperties: false }),
|
|
1716
1724
|
executionMode: "sequential",
|
|
1717
1725
|
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
|
|
1718
1726
|
reconcileFocusedGoalFromDisk(ctx);
|
|
1719
1727
|
|
|
1720
|
-
// -- Phase 1: Objective update (quick sync) --
|
|
1721
|
-
// Apply updatedObjective before any completion logic so the completion
|
|
1722
|
-
// flow (if status=complete is also set) reads the latest objective.
|
|
1723
|
-
if (params.updatedObjective !== undefined) {
|
|
1724
|
-
const newObjective = params.updatedObjective.trim();
|
|
1725
|
-
if (!newObjective) throw new Error("update_goal requires a non-empty updatedObjective.");
|
|
1726
|
-
const updateGate = validateGoalUpdate({ goal: state.goal });
|
|
1727
|
-
if (!updateGate.ok) {
|
|
1728
|
-
return {
|
|
1729
|
-
content: [{ type: "text", text: updateGate.message }],
|
|
1730
|
-
details: goalDetails(state.goal),
|
|
1731
|
-
};
|
|
1732
|
-
}
|
|
1733
|
-
if (!state.goal) throw new Error("Goal disappeared during objective update.");
|
|
1734
|
-
const next: GoalRecord = {
|
|
1735
|
-
...state.goal,
|
|
1736
|
-
objective: newObjective,
|
|
1737
|
-
updatedAt: nowIso(),
|
|
1738
|
-
};
|
|
1739
|
-
state.goal = writeActiveGoalFile(ctx, next);
|
|
1740
|
-
pi.appendEntry(STATE_ENTRY, goalDetails(state.goal));
|
|
1741
|
-
try {
|
|
1742
|
-
appendGoalEvent(ctx, {
|
|
1743
|
-
type: "goal_tweaked",
|
|
1744
|
-
goalId: state.goal.id,
|
|
1745
|
-
changeSummary: "Objective updated via update_goal",
|
|
1746
|
-
at: state.goal.updatedAt,
|
|
1747
|
-
});
|
|
1748
|
-
} catch {
|
|
1749
|
-
// Ledger append failure should not block update
|
|
1750
|
-
}
|
|
1751
|
-
updateUI(ctx);
|
|
1752
|
-
|
|
1753
|
-
// Quick sync only (no status=complete) — return without terminating
|
|
1754
|
-
if (params.status !== COMPLETE_STATUS) {
|
|
1755
|
-
return {
|
|
1756
|
-
content: [{ type: "text", text: `Goal objective updated.` }],
|
|
1757
|
-
details: goalDetails(state.goal),
|
|
1758
|
-
};
|
|
1759
|
-
}
|
|
1760
|
-
// Fall through: status=complete also set, proceed with completion below
|
|
1761
|
-
}
|
|
1762
|
-
|
|
1763
1728
|
// -- Phase 2: Status validation --
|
|
1764
1729
|
if (params.status !== COMPLETE_STATUS) {
|
|
1765
|
-
if (params.updatedObjective === undefined) {
|
|
1766
|
-
throw new Error("update_goal requires either status=complete or updatedObjective.");
|
|
1767
|
-
}
|
|
1768
1730
|
throw new Error("update_goal requires status=complete when marking a goal complete.");
|
|
1769
1731
|
}
|
|
1770
1732
|
|
|
@@ -1913,6 +1875,7 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
1913
1875
|
goal: auditTarget,
|
|
1914
1876
|
completionSummary: params.completionSummary,
|
|
1915
1877
|
detailedSummary: detailedSummary(auditTarget),
|
|
1878
|
+
testResults: params.testResults,
|
|
1916
1879
|
signal: auditAbortController.signal,
|
|
1917
1880
|
onProgress: (progress) => {
|
|
1918
1881
|
auditProgress = {
|
|
@@ -2064,7 +2027,7 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
2064
2027
|
};
|
|
2065
2028
|
},
|
|
2066
2029
|
renderCall(args, theme) {
|
|
2067
|
-
const label = args?.status ??
|
|
2030
|
+
const label = args?.status ?? "";
|
|
2068
2031
|
return new Text(theme.fg("toolTitle", "update_goal ") + theme.fg("success", label), 0, 0);
|
|
2069
2032
|
},
|
|
2070
2033
|
renderResult(result, _options, theme) {
|
|
@@ -36,6 +36,8 @@ ${untrustedObjectiveBlock(goal)}
|
|
|
36
36
|
|
|
37
37
|
Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.
|
|
38
38
|
|
|
39
|
+
To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.
|
|
40
|
+
|
|
39
41
|
Keep this goal in force until it is actually achieved. Do not pause for confirmation just because a phase, chapter, file, or checklist item is finished. At each natural stopping point, compare every explicit requirement with concrete evidence from the workspace/session. If the objective is complete, call update_goal with status=complete and summarize the evidence; update_goal will launch an independent pi auditor agent and only archive if that auditor returns <approved/>. If it is not complete, choose the next concrete action and do it.
|
|
40
42
|
|
|
41
43
|
The completion auditor is independent and semantic, not a paperwork checklist. It may inspect files and command output, and it will reject scaffold-only, alpha, template, proxy-metric, or weakly verified completions with <disapproved/>.
|
|
@@ -46,7 +48,7 @@ If the user explicitly asks to abandon/cancel this goal, or the objective is obs
|
|
|
46
48
|
|
|
47
49
|
Do NOT silently invent workarounds, fake completion, or quietly redefine the objective. Do NOT call update_goal=complete to escape a blocker.
|
|
48
50
|
|
|
49
|
-
Goal evolution: if the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale.
|
|
51
|
+
Goal evolution: if the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale. The goal objective is immutable — the agent must NOT modify it autonomously. Propose the updated objective concisely and ask the user to run /goal-tweak to revise it. Do NOT mark the goal complete with a stale objective.${sisyphusDisciplineBlock(goal) ? `\n${sisyphusDisciplineBlock(goal)}` : ""}`;
|
|
50
52
|
}
|
|
51
53
|
|
|
52
54
|
export function continuationPrompt(goal: GoalRecord): string {
|
|
@@ -62,6 +64,8 @@ export function continuationPrompt(goal: GoalRecord): string {
|
|
|
62
64
|
"",
|
|
63
65
|
"Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.",
|
|
64
66
|
"",
|
|
67
|
+
"To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.",
|
|
68
|
+
"",
|
|
65
69
|
"Avoid repeating work that is already done. Choose the next concrete action toward the objective.",
|
|
66
70
|
"",
|
|
67
71
|
"Before deciding that the goal is achieved, perform a completion audit against the actual current state:",
|
|
@@ -79,7 +83,7 @@ export function continuationPrompt(goal: GoalRecord): string {
|
|
|
79
83
|
"Do not call update_goal unless the goal is complete enough to survive independent semantic auditing. Do not mark a goal complete merely because work is stopping.",
|
|
80
84
|
"Do not ask the user for confirmation unless there is a real blocker.",
|
|
81
85
|
"",
|
|
82
|
-
"Goal evolution: if the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale.
|
|
86
|
+
"Goal evolution: if the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale. The goal objective is immutable — the agent must NOT modify it autonomously. Propose the updated objective concisely and ask the user to run /goal-tweak to revise it. Do NOT mark the goal complete with a stale objective.",
|
|
83
87
|
"",
|
|
84
88
|
"If you hit a real blocker (missing credentials, contradictory spec, file/permission you cannot access, dangerous operation pending user approval, or an unclear Sisyphus-style ordered plan), call pause_goal({reason, suggestedAction?}) and stop. If the user explicitly asks to abandon/cancel, or the objective is obsolete, impossible, or unsafe to continue, call abort_goal({reason}) and stop. Do not silently invent workarounds. Do not fake completion. pause_goal and abort_goal are structured lifecycle exits; update_goal=complete is not an escape hatch for blockers.",
|
|
85
89
|
...(goal.sisyphus ? ["", sisyphusDisciplineBlock(goal)] : []),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-goal-x",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.2",
|
|
4
4
|
"description": "Goal mode extension for pi: persistent long-running objectives, /goal-set drafting, Sisyphus prompt style, autoContinue, and an above-editor status overlay. Fork of @capyup/pi-goal.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "pi-goal-x contributors",
|