pi-goal-x 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/extensions/goal-auditor.ts +36 -4
- package/extensions/goal.ts +10 -0
- package/extensions/prompts/goal-prompts.ts +4 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -35,6 +35,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
|
|
|
35
35
|
- **Cleaner lifecycle** — `AbortSignal` is properly wired to `session.abort()`, animation timers are cleaned up, and the unsubscribe path is always executed. No more having to kill the session.
|
|
36
36
|
- **Completion report includes full auditor output** — the auditor's full report is included in the goal completion conversation message upon approval, not just a verdict.
|
|
37
37
|
- **Session factory injection** — `runGoalCompletionAuditor` accepts an optional `createSession` parameter for testability, enabling mock auditor sessions in tests.
|
|
38
|
+
- **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `update_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
|
|
38
39
|
|
|
39
40
|
### Drafting & UX
|
|
40
41
|
|
|
@@ -127,10 +127,22 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
|
|
|
127
127
|
return { approved: approved && !disapproved, disapproved };
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
+
export interface AuditorTestResults {
|
|
131
|
+
/** Exit code of the test run (0 = success) */
|
|
132
|
+
exitCode: number;
|
|
133
|
+
/** Test suite name, e.g. 'npm test' */
|
|
134
|
+
suiteName?: string;
|
|
135
|
+
/** Last lines of test output showing results */
|
|
136
|
+
output?: string;
|
|
137
|
+
/** ISO timestamp of when tests were run */
|
|
138
|
+
timestamp?: string;
|
|
139
|
+
}
|
|
140
|
+
|
|
130
141
|
export function buildGoalAuditorPrompt(args: {
|
|
131
142
|
goal: GoalRecord;
|
|
132
143
|
completionSummary?: string | null;
|
|
133
144
|
detailedSummary: string;
|
|
145
|
+
testResults?: AuditorTestResults | null;
|
|
134
146
|
}): string {
|
|
135
147
|
return [
|
|
136
148
|
"You are the independent completion auditor for pi-goal.",
|
|
@@ -157,12 +169,31 @@ export function buildGoalAuditorPrompt(args: {
|
|
|
157
169
|
"<goal_details>",
|
|
158
170
|
args.detailedSummary,
|
|
159
171
|
"</goal_details>",
|
|
172
|
+
...(args.testResults ? [
|
|
173
|
+
"",
|
|
174
|
+
"Executor test evidence:",
|
|
175
|
+
"<test_evidence>",
|
|
176
|
+
` Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
|
|
177
|
+
` Exit code: ${args.testResults.exitCode}`,
|
|
178
|
+
` Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
|
|
179
|
+
` Output:`,
|
|
180
|
+
...(args.testResults.output ? args.testResults.output.split("\n").map((l) => ` ${l}`) : [" (none provided)"]),
|
|
181
|
+
"</test_evidence>",
|
|
182
|
+
] : []),
|
|
160
183
|
"",
|
|
161
184
|
"Audit checklist:",
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
185
|
+
...(args.testResults ? [
|
|
186
|
+
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
187
|
+
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
188
|
+
"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
|
|
189
|
+
"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
190
|
+
"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
191
|
+
] : [
|
|
192
|
+
"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
|
|
193
|
+
"2. Inspect artifacts or command output that can prove or disprove those criteria.",
|
|
194
|
+
"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
|
|
195
|
+
"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
|
|
196
|
+
]),
|
|
166
197
|
"",
|
|
167
198
|
"Progress reporting:",
|
|
168
199
|
"You have the report_auditor_progress tool available to report your progress to the user.",
|
|
@@ -240,6 +271,7 @@ export async function runGoalCompletionAuditor(args: {
|
|
|
240
271
|
goal: GoalRecord;
|
|
241
272
|
completionSummary?: string | null;
|
|
242
273
|
detailedSummary: string;
|
|
274
|
+
testResults?: AuditorTestResults | null;
|
|
243
275
|
signal?: AbortSignal;
|
|
244
276
|
onProgress?: AuditorProgressCallback;
|
|
245
277
|
/**
|
package/extensions/goal.ts
CHANGED
|
@@ -441,6 +441,8 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
441
441
|
active.add(QUESTIONNAIRE_TOOL_NAME);
|
|
442
442
|
} else if (state.goal?.status === "active") {
|
|
443
443
|
for (const name of goalExecutionWorkTools) active.add(name);
|
|
444
|
+
active.add(QUESTION_TOOL_NAME);
|
|
445
|
+
active.add(QUESTIONNAIRE_TOOL_NAME);
|
|
444
446
|
}
|
|
445
447
|
pi.setActiveTools(Array.from(active));
|
|
446
448
|
} catch {}
|
|
@@ -1706,12 +1708,19 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
1706
1708
|
"Do not use update_goal=complete as an escape hatch when you are blocked. If you are blocked, call pause_goal({reason, suggestedAction?}) instead so the user can intervene.",
|
|
1707
1709
|
"For sisyphus goals, do not mark complete until every numbered step has been executed and individually verified against its done criterion.",
|
|
1708
1710
|
"If the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale. Use update_goal with updatedObjective to sync the objective before continuing work or before marking the goal complete. This ensures the auditor evaluates against the latest requirements.",
|
|
1711
|
+
"If you have just run the test suite successfully and the tests all pass, include a testResults object with the exit code (0) and relevant output. The auditor will see this evidence and can skip re-running the tests.",
|
|
1709
1712
|
],
|
|
1710
1713
|
parameters: Type.Object({
|
|
1711
1714
|
status: Type.Optional(StringEnum([COMPLETE_STATUS] as const, { description: "Set to complete only when the objective is achieved." })),
|
|
1712
1715
|
completionSummary: Type.Optional(Type.String({ description: "Concise completion claim and evidence summary passed to the independent auditor agent." })),
|
|
1713
1716
|
confirmBypassAuditor: Type.Optional(Type.Boolean({ description: "Set to true to confirm bypassing the independent auditor when it is disabled in settings." })),
|
|
1714
1717
|
updatedObjective: Type.Optional(Type.String({ description: "Revised goal objective. Use when the user's requirements have changed mid-flight. The goal remains active so the agent can continue working toward the new objective. Can be combined with status=complete to update the objective before the completion audit." })),
|
|
1718
|
+
testResults: Type.Optional(Type.Object({
|
|
1719
|
+
exitCode: Type.Number({ description: "Exit code of the test run (0 = success)" }),
|
|
1720
|
+
suiteName: Type.Optional(Type.String({ description: "Test suite name, e.g. 'npm test'" })),
|
|
1721
|
+
output: Type.Optional(Type.String({ description: "Last lines of test output showing results" })),
|
|
1722
|
+
timestamp: Type.Optional(Type.String({ description: "ISO timestamp of when tests were run" })),
|
|
1723
|
+
}, { description: "Structured test evidence passed to the auditor so it can skip redundant test re-runs. If you have just run the test suite successfully, include this so the auditor accepts the results without re-running." })),
|
|
1715
1724
|
}),
|
|
1716
1725
|
executionMode: "sequential",
|
|
1717
1726
|
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
|
|
@@ -1913,6 +1922,7 @@ export default function goalExtension(pi: ExtensionAPI): void {
|
|
|
1913
1922
|
goal: auditTarget,
|
|
1914
1923
|
completionSummary: params.completionSummary,
|
|
1915
1924
|
detailedSummary: detailedSummary(auditTarget),
|
|
1925
|
+
testResults: params.testResults,
|
|
1916
1926
|
signal: auditAbortController.signal,
|
|
1917
1927
|
onProgress: (progress) => {
|
|
1918
1928
|
auditProgress = {
|
|
@@ -36,6 +36,8 @@ ${untrustedObjectiveBlock(goal)}
|
|
|
36
36
|
|
|
37
37
|
Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.
|
|
38
38
|
|
|
39
|
+
To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.
|
|
40
|
+
|
|
39
41
|
Keep this goal in force until it is actually achieved. Do not pause for confirmation just because a phase, chapter, file, or checklist item is finished. At each natural stopping point, compare every explicit requirement with concrete evidence from the workspace/session. If the objective is complete, call update_goal with status=complete and summarize the evidence; update_goal will launch an independent pi auditor agent and only archive if that auditor returns <approved/>. If it is not complete, choose the next concrete action and do it.
|
|
40
42
|
|
|
41
43
|
The completion auditor is independent and semantic, not a paperwork checklist. It may inspect files and command output, and it will reject scaffold-only, alpha, template, proxy-metric, or weakly verified completions with <disapproved/>.
|
|
@@ -62,6 +64,8 @@ export function continuationPrompt(goal: GoalRecord): string {
|
|
|
62
64
|
"",
|
|
63
65
|
"Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.",
|
|
64
66
|
"",
|
|
67
|
+
"To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.",
|
|
68
|
+
"",
|
|
65
69
|
"Avoid repeating work that is already done. Choose the next concrete action toward the objective.",
|
|
66
70
|
"",
|
|
67
71
|
"Before deciding that the goal is achieved, perform a completion audit against the actual current state:",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-goal-x",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.1",
|
|
4
4
|
"description": "Goal mode extension for pi: persistent long-running objectives, /goal-set drafting, Sisyphus prompt style, autoContinue, and an above-editor status overlay. Fork of @capyup/pi-goal.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "pi-goal-x contributors",
|