npm - pi-goal-x - Versions diffs - 0.10.0 → 0.10.1 - Mend

pi-goal-x 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +1 -0
package/extensions/goal-auditor.ts +36 -4
package/extensions/goal.ts +10 -0
package/extensions/prompts/goal-prompts.ts +4 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -35,6 +35,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
 - **Cleaner lifecycle** — `AbortSignal` is properly wired to `session.abort()`, animation timers are cleaned up, and the unsubscribe path is always executed. No more having to kill the session.
 - **Completion report includes full auditor output** — the auditor's full report is included in the goal completion conversation message upon approval, not just a verdict.
 - **Session factory injection** — `runGoalCompletionAuditor` accepts an optional `createSession` parameter for testability, enabling mock auditor sessions in tests.
+- **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `update_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
 ### Drafting & UX

package/extensions/goal-auditor.ts CHANGED Viewed

@@ -127,10 +127,22 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
 	return { approved: approved && !disapproved, disapproved };
 }
+export interface AuditorTestResults {
+	/** Exit code of the test run (0 = success) */
+	exitCode: number;
+	/** Test suite name, e.g. 'npm test' */
+	suiteName?: string;
+	/** Last lines of test output showing results */
+	output?: string;
+	/** ISO timestamp of when tests were run */
+	timestamp?: string;
+}
 export function buildGoalAuditorPrompt(args: {
 	goal: GoalRecord;
 	completionSummary?: string | null;
 	detailedSummary: string;
+	testResults?: AuditorTestResults | null;
 }): string {
 	return [
 		"You are the independent completion auditor for pi-goal.",
@@ -157,12 +169,31 @@ export function buildGoalAuditorPrompt(args: {
 		"<goal_details>",
 		args.detailedSummary,
 		"</goal_details>",
+		...(args.testResults ? [
+			"",
+			"Executor test evidence:",
+			"<test_evidence>",
+			`  Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
+			`  Exit code: ${args.testResults.exitCode}`,
+			`  Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
+			`  Output:`,
+			...(args.testResults.output ? args.testResults.output.split("\n").map((l) => `    ${l}`) : ["    (none provided)"]),
+			"</test_evidence>",
+		] : []),
 		"",
 		"Audit checklist:",
-		"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
-		"2. Inspect artifacts or command output that can prove or disprove those criteria.",
-		"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
-		"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		...(args.testResults ? [
+			"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
+			"2. Inspect artifacts or command output that can prove or disprove those criteria.",
+			"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
+			"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
+			"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		] : [
+			"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
+			"2. Inspect artifacts or command output that can prove or disprove those criteria.",
+			"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
+			"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		]),
 		"",
 		"Progress reporting:",
 		"You have the report_auditor_progress tool available to report your progress to the user.",
@@ -240,6 +271,7 @@ export async function runGoalCompletionAuditor(args: {
 	goal: GoalRecord;
 	completionSummary?: string | null;
 	detailedSummary: string;
+	testResults?: AuditorTestResults | null;
 	signal?: AbortSignal;
 	onProgress?: AuditorProgressCallback;
 	/**

package/extensions/goal.ts CHANGED Viewed

@@ -441,6 +441,8 @@ export default function goalExtension(pi: ExtensionAPI): void {
 				active.add(QUESTIONNAIRE_TOOL_NAME);
 			} else if (state.goal?.status === "active") {
 				for (const name of goalExecutionWorkTools) active.add(name);
+				active.add(QUESTION_TOOL_NAME);
+				active.add(QUESTIONNAIRE_TOOL_NAME);
 			}
 			pi.setActiveTools(Array.from(active));
 		} catch {}
@@ -1706,12 +1708,19 @@ export default function goalExtension(pi: ExtensionAPI): void {
 			"Do not use update_goal=complete as an escape hatch when you are blocked. If you are blocked, call pause_goal({reason, suggestedAction?}) instead so the user can intervene.",
 			"For sisyphus goals, do not mark complete until every numbered step has been executed and individually verified against its done criterion.",
 			"If the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale. Use update_goal with updatedObjective to sync the objective before continuing work or before marking the goal complete. This ensures the auditor evaluates against the latest requirements.",
+			"If you have just run the test suite successfully and the tests all pass, include a testResults object with the exit code (0) and relevant output. The auditor will see this evidence and can skip re-running the tests.",
 		],
 		parameters: Type.Object({
 			status: Type.Optional(StringEnum([COMPLETE_STATUS] as const, { description: "Set to complete only when the objective is achieved." })),
 			completionSummary: Type.Optional(Type.String({ description: "Concise completion claim and evidence summary passed to the independent auditor agent." })),
 			confirmBypassAuditor: Type.Optional(Type.Boolean({ description: "Set to true to confirm bypassing the independent auditor when it is disabled in settings." })),
 			updatedObjective: Type.Optional(Type.String({ description: "Revised goal objective. Use when the user's requirements have changed mid-flight. The goal remains active so the agent can continue working toward the new objective. Can be combined with status=complete to update the objective before the completion audit." })),
+			testResults: Type.Optional(Type.Object({
+				exitCode: Type.Number({ description: "Exit code of the test run (0 = success)" }),
+				suiteName: Type.Optional(Type.String({ description: "Test suite name, e.g. 'npm test'" })),
+				output: Type.Optional(Type.String({ description: "Last lines of test output showing results" })),
+				timestamp: Type.Optional(Type.String({ description: "ISO timestamp of when tests were run" })),
+			}, { description: "Structured test evidence passed to the auditor so it can skip redundant test re-runs. If you have just run the test suite successfully, include this so the auditor accepts the results without re-running." })),
 		}),
 		executionMode: "sequential",
 		async execute(_toolCallId, params, signal, _onUpdate, ctx) {
@@ -1913,6 +1922,7 @@ export default function goalExtension(pi: ExtensionAPI): void {
 				goal: auditTarget,
 				completionSummary: params.completionSummary,
 				detailedSummary: detailedSummary(auditTarget),
+				testResults: params.testResults,
 				signal: auditAbortController.signal,
 				onProgress: (progress) => {
 					auditProgress = {

package/extensions/prompts/goal-prompts.ts CHANGED Viewed

@@ -36,6 +36,8 @@ ${untrustedObjectiveBlock(goal)}
 Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.
+To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.
 Keep this goal in force until it is actually achieved. Do not pause for confirmation just because a phase, chapter, file, or checklist item is finished. At each natural stopping point, compare every explicit requirement with concrete evidence from the workspace/session. If the objective is complete, call update_goal with status=complete and summarize the evidence; update_goal will launch an independent pi auditor agent and only archive if that auditor returns <approved/>. If it is not complete, choose the next concrete action and do it.
 The completion auditor is independent and semantic, not a paperwork checklist. It may inspect files and command output, and it will reject scaffold-only, alpha, template, proxy-metric, or weakly verified completions with <disapproved/>.
@@ -62,6 +64,8 @@ export function continuationPrompt(goal: GoalRecord): string {
 		"",
 		"Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.",
 		"",
+		"To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.",
+		"",
 		"Avoid repeating work that is already done. Choose the next concrete action toward the objective.",
 		"",
 		"Before deciding that the goal is achieved, perform a completion audit against the actual current state:",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-goal-x",
-  "version": "0.10.0",
+  "version": "0.10.1",
   "description": "Goal mode extension for pi: persistent long-running objectives, /goal-set drafting, Sisyphus prompt style, autoContinue, and an above-editor status overlay. Fork of @capyup/pi-goal.",
   "license": "MIT",
   "author": "pi-goal-x contributors",