npm - pi-goal-x - Versions diffs - 0.9.0 → 0.10.1 - Mend

pi-goal-x 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +2 -1
package/extensions/goal-auditor.ts +130 -16
package/extensions/goal.ts +10 -0
package/extensions/prompts/goal-prompts.ts +4 -0
package/extensions/widgets/goal-widget.ts +48 -5
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -27,7 +27,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
 ### Completion auditor
-- **Live progress widget** — when the auditor runs, the TUI shows a spinner, the current tool being executed, and recent output lines. No more wondering if anything is happening.
+- **Live progress widget** — when the auditor runs, the TUI shows a spinner, a progress bar (`[████░░░░] 40%`), step labels (`Inspecting files...`, `Verifying success criteria...`), the current tool being executed, and recent output lines. No more wondering if anything is happening.
 - **Escape to skip** — press Escape during an audit to abort it and complete the goal immediately. The skip is recorded in the ledger as `audit_skipped` with reason `user_aborted` and auditor model metadata.
 - **Disable the auditor entirely** — set `disabled: true` in `.pi/goal-auditor.json` (or toggle it via `/goal-settings` → `disabled`). The agent can still bypass with user confirmation by passing `confirmBypassAuditor: true` to `update_goal`.
 - **Skipped audits are recorded** — every skip (whether disabled or Escape-aborted) is logged to the ledger with the reason, provider, model, and thinking level for full traceability.
@@ -35,6 +35,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
 - **Cleaner lifecycle** — `AbortSignal` is properly wired to `session.abort()`, animation timers are cleaned up, and the unsubscribe path is always executed. No more having to kill the session.
 - **Completion report includes full auditor output** — the auditor's full report is included in the goal completion conversation message upon approval, not just a verdict.
 - **Session factory injection** — `runGoalCompletionAuditor` accepts an optional `createSession` parameter for testability, enabling mock auditor sessions in tests.
+- **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `update_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
 ### Drafting & UX

package/extensions/goal-auditor.ts CHANGED Viewed

@@ -1,10 +1,13 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
+import type { Static } from "@earendil-works/pi-ai";
+import { Type } from "@earendil-works/pi-ai";
 import type { ThinkingLevel } from "@earendil-works/pi-agent-core";
 import type { Model } from "@earendil-works/pi-ai";
 import {
 	createAgentSession,
 	createExtensionRuntime,
+	defineTool,
 	SessionManager,
 	SettingsManager,
 	type ExtensionContext,
@@ -29,9 +32,13 @@ export interface AuditorProgress {
 	/** Recent text output lines from the auditor's assistant messages */
 	recentOutput: string[];
 	/** Phase of the audit */
-	phase: "running" | "tool_executing" | "producing_report" | "done";
+	phase: "running" | "tool_executing" | "producing_report" | "thinking" | "done";
 	/** Elapsed ms since audit started */
 	elapsedMs: number;
+	/** Current step label shown to the user (e.g. "Inspecting files...") */
+	label?: string;
+	/** Completion percentage from 0 to 100 */
+	percentage?: number;
 }
 export type AuditorProgressCallback = (progress: AuditorProgress) => void;
@@ -120,10 +127,22 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
 	return { approved: approved && !disapproved, disapproved };
 }
+export interface AuditorTestResults {
+	/** Exit code of the test run (0 = success) */
+	exitCode: number;
+	/** Test suite name, e.g. 'npm test' */
+	suiteName?: string;
+	/** Last lines of test output showing results */
+	output?: string;
+	/** ISO timestamp of when tests were run */
+	timestamp?: string;
+}
 export function buildGoalAuditorPrompt(args: {
 	goal: GoalRecord;
 	completionSummary?: string | null;
 	detailedSummary: string;
+	testResults?: AuditorTestResults | null;
 }): string {
 	return [
 		"You are the independent completion auditor for pi-goal.",
@@ -150,15 +169,53 @@ export function buildGoalAuditorPrompt(args: {
 		"<goal_details>",
 		args.detailedSummary,
 		"</goal_details>",
+		...(args.testResults ? [
+			"",
+			"Executor test evidence:",
+			"<test_evidence>",
+			`  Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
+			`  Exit code: ${args.testResults.exitCode}`,
+			`  Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
+			`  Output:`,
+			...(args.testResults.output ? args.testResults.output.split("\n").map((l) => `    ${l}`) : ["    (none provided)"]),
+			"</test_evidence>",
+		] : []),
 		"",
 		"Audit checklist:",
-		"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
-		"2. Inspect artifacts or command output that can prove or disprove those criteria.",
-		"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
-		"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		...(args.testResults ? [
+			"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
+			"2. Inspect artifacts or command output that can prove or disprove those criteria.",
+			"3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
+			"4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
+			"5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		] : [
+			"1. Extract the real success criteria from the objective, including quality/reader outcomes.",
+			"2. Inspect artifacts or command output that can prove or disprove those criteria.",
+			"3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
+			"4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
+		]),
+		"",
+		"Progress reporting:",
+		"You have the report_auditor_progress tool available to report your progress to the user.",
+		"Please use it at natural phase boundaries:",
+		"  - When starting: report_auditor_progress(label='Starting audit...', percentage=0)",
+		"  - When beginning file inspection: report_auditor_progress(label='Inspecting files...', percentage=25)",
+		"  - When verifying success criteria: report_auditor_progress(label='Verifying success criteria...', percentage=50)",
+		"  - When evaluating evidence: report_auditor_progress(label='Evaluating evidence...', percentage=75)",
+		"  - When producing final report: report_auditor_progress(label='Producing report...', percentage=90)",
+		"This is purely for user visibility and does not affect the audit outcome.",
 	].join("\n");
 }
+/** Tool name for auditor progress reporting */
+export const REPORT_AUDITOR_PROGRESS_TOOL_NAME = "report_auditor_progress";
+/** Parameters for the report_auditor_progress tool */
+export const reportAuditorProgressParams = Type.Object({
+	label: Type.String({ description: "Current step label describing what the auditor is doing (e.g. 'Inspecting files...', 'Verifying success criteria...', 'Producing report...')" }),
+	percentage: Type.Number({ description: "Completion percentage from 0 to 100", minimum: 0, maximum: 100 }),
+});
 function makeAuditorResourceLoader(): ResourceLoader {
 	return {
 		getExtensions: () => ({ extensions: [], errors: [], runtime: createExtensionRuntime() }),
@@ -170,9 +227,14 @@ function makeAuditorResourceLoader(): ResourceLoader {
 			"You are a read-only completion auditor running in an isolated pi agent session.",
 			"Inspect the repository and decide whether the claimed goal completion is genuinely satisfied.",
 			"Never modify files. Never approve unless the actual user objective is complete.",
+			"",
+			"You have the report_auditor_progress tool available. Use it to report your audit progress",
+			"to the user at natural phase boundaries (starting, inspecting files, verifying criteria,",
+			"producing report). This helps the user understand what the auditor is doing and how far",
+			"along it is.",
 		].join("\n"),
 		getAppendSystemPrompt: () => [],
-		extendResources: () => {},
+	extendResources: () => {},
 		reload: async () => {},
 	};
 }
@@ -209,6 +271,7 @@ export async function runGoalCompletionAuditor(args: {
 	goal: GoalRecord;
 	completionSummary?: string | null;
 	detailedSummary: string;
+	testResults?: AuditorTestResults | null;
 	signal?: AbortSignal;
 	onProgress?: AuditorProgressCallback;
 	/**
@@ -228,16 +291,6 @@ export async function runGoalCompletionAuditor(args: {
 	}
 	try {
 		const createSession = args.createSession ?? createAgentSession;
-		const { session } = await createSession({
-			cwd: args.ctx.cwd,
-			model,
-			thinkingLevel,
-			modelRegistry: args.ctx.modelRegistry,
-			resourceLoader: makeAuditorResourceLoader(),
-			sessionManager: SessionManager.inMemory(args.ctx.cwd),
-			settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }),
-			tools: ["read", "grep", "find", "ls", "bash"],
-		});
 		const startedAt = Date.now();
 		const progress: AuditorProgress = {
 			recentOutput: [],
@@ -248,6 +301,49 @@ export async function runGoalCompletionAuditor(args: {
 			progress.elapsedMs = Date.now() - startedAt;
 			args.onProgress?.({ ...progress });
 		}
+		// Build the report_auditor_progress tool, capturing the progress state
+		const reportProgressTool = defineTool({
+			name: REPORT_AUDITOR_PROGRESS_TOOL_NAME,
+			label: "Report Auditor Progress",
+			description: "Report current progress of the audit to the user. Call this at natural phase boundaries (starting, inspecting files, verifying criteria, producing report) to keep the user informed.",
+			promptSnippet: "Report current audit progress (step label and completion percentage) to the user.",
+			promptGuidelines: [
+				"Use report_auditor_progress at natural phase boundaries during the audit:",
+				"  - When starting the audit: label='Starting audit...' percentage=0",
+				"  - When beginning file inspection: label='Inspecting files...' percentage=25",
+				"  - When verifying success criteria: label='Verifying success criteria...' percentage=50",
+				"  - When evaluating evidence: label='Evaluating evidence...' percentage=75",
+				"  - When producing final report: label='Producing report...' percentage=90",
+				"This is purely for user visibility — it does not affect the audit outcome.",
+				"Do not call this tool more than once every few seconds to avoid flooding.",
+			],
+			parameters: reportAuditorProgressParams,
+			executionMode: "sequential",
+			async execute(_toolCallId, params) {
+				const { label, percentage } = params as Static<typeof reportAuditorProgressParams>;
+				progress.label = label;
+				progress.percentage = percentage;
+				progress.phase = "running";
+				emitProgress();
+				return {
+					content: [{ type: "text", text: `Progress reported: ${label} (${percentage}%)` }],
+					details: {},
+				};
+			},
+		});
+		const { session } = await createSession({
+			cwd: args.ctx.cwd,
+			model,
+			thinkingLevel,
+			modelRegistry: args.ctx.modelRegistry,
+			resourceLoader: makeAuditorResourceLoader(),
+			sessionManager: SessionManager.inMemory(args.ctx.cwd),
+			settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }),
+			tools: ["read", "grep", "find", "ls", "bash", REPORT_AUDITOR_PROGRESS_TOOL_NAME],
+			customTools: [reportProgressTool],
+		});
 		const unsubscribe = session.subscribe((event) => {
 			if (event.type === "tool_execution_start") {
 				progress.currentTool = event.toolName;
@@ -268,6 +364,20 @@ export async function runGoalCompletionAuditor(args: {
 				return;
 			}
 			if (event.type === "message_update") {
+				// Check for thinking events from the assistant stream
+				const streamEvent = (event as any).assistantMessageEvent;
+				if (streamEvent?.type === "thinking_start") {
+					progress.phase = "thinking";
+					if (!progress.label) progress.label = "Analyzing goal...";
+					emitProgress();
+					return;
+				}
+				if (streamEvent?.type === "thinking_end") {
+					progress.phase = "running";
+					emitProgress();
+					return;
+				}
+				// For text content, show producing_report phase
 				progress.phase = "producing_report";
 				const message = event.message as any;
 				if (message?.role === "assistant") {
@@ -300,6 +410,8 @@ export async function runGoalCompletionAuditor(args: {
 		args.signal?.addEventListener("abort", abortSession, { once: true });
 		// Emit initial progress
+		progress.label = "Starting audit...";
+		progress.percentage = 0;
 		emitProgress();
 		try {
 			if (args.signal?.aborted) return { approved: false, disapproved: true, output: "", model: modelLabel(model), thinkingLevel, error: "Auditor aborted." };
@@ -307,6 +419,8 @@ export async function runGoalCompletionAuditor(args: {
 		} finally {
 			args.signal?.removeEventListener("abort", abortSession);
 			progress.phase = "done";
+			progress.label = "Audit complete.";
+			progress.percentage = 100;
 			emitProgress();
 			unsubscribe();
 		}

package/extensions/goal.ts CHANGED Viewed

@@ -441,6 +441,8 @@ export default function goalExtension(pi: ExtensionAPI): void {
 				active.add(QUESTIONNAIRE_TOOL_NAME);
 			} else if (state.goal?.status === "active") {
 				for (const name of goalExecutionWorkTools) active.add(name);
+				active.add(QUESTION_TOOL_NAME);
+				active.add(QUESTIONNAIRE_TOOL_NAME);
 			}
 			pi.setActiveTools(Array.from(active));
 		} catch {}
@@ -1706,12 +1708,19 @@ export default function goalExtension(pi: ExtensionAPI): void {
 			"Do not use update_goal=complete as an escape hatch when you are blocked. If you are blocked, call pause_goal({reason, suggestedAction?}) instead so the user can intervene.",
 			"For sisyphus goals, do not mark complete until every numbered step has been executed and individually verified against its done criterion.",
 			"If the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale. Use update_goal with updatedObjective to sync the objective before continuing work or before marking the goal complete. This ensures the auditor evaluates against the latest requirements.",
+			"If you have just run the test suite successfully and the tests all pass, include a testResults object with the exit code (0) and relevant output. The auditor will see this evidence and can skip re-running the tests.",
 		],
 		parameters: Type.Object({
 			status: Type.Optional(StringEnum([COMPLETE_STATUS] as const, { description: "Set to complete only when the objective is achieved." })),
 			completionSummary: Type.Optional(Type.String({ description: "Concise completion claim and evidence summary passed to the independent auditor agent." })),
 			confirmBypassAuditor: Type.Optional(Type.Boolean({ description: "Set to true to confirm bypassing the independent auditor when it is disabled in settings." })),
 			updatedObjective: Type.Optional(Type.String({ description: "Revised goal objective. Use when the user's requirements have changed mid-flight. The goal remains active so the agent can continue working toward the new objective. Can be combined with status=complete to update the objective before the completion audit." })),
+			testResults: Type.Optional(Type.Object({
+				exitCode: Type.Number({ description: "Exit code of the test run (0 = success)" }),
+				suiteName: Type.Optional(Type.String({ description: "Test suite name, e.g. 'npm test'" })),
+				output: Type.Optional(Type.String({ description: "Last lines of test output showing results" })),
+				timestamp: Type.Optional(Type.String({ description: "ISO timestamp of when tests were run" })),
+			}, { description: "Structured test evidence passed to the auditor so it can skip redundant test re-runs. If you have just run the test suite successfully, include this so the auditor accepts the results without re-running." })),
 		}),
 		executionMode: "sequential",
 		async execute(_toolCallId, params, signal, _onUpdate, ctx) {
@@ -1913,6 +1922,7 @@ export default function goalExtension(pi: ExtensionAPI): void {
 				goal: auditTarget,
 				completionSummary: params.completionSummary,
 				detailedSummary: detailedSummary(auditTarget),
+				testResults: params.testResults,
 				signal: auditAbortController.signal,
 				onProgress: (progress) => {
 					auditProgress = {

package/extensions/prompts/goal-prompts.ts CHANGED Viewed

@@ -36,6 +36,8 @@ ${untrustedObjectiveBlock(goal)}
 Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.
+To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.
 Keep this goal in force until it is actually achieved. Do not pause for confirmation just because a phase, chapter, file, or checklist item is finished. At each natural stopping point, compare every explicit requirement with concrete evidence from the workspace/session. If the objective is complete, call update_goal with status=complete and summarize the evidence; update_goal will launch an independent pi auditor agent and only archive if that auditor returns <approved/>. If it is not complete, choose the next concrete action and do it.
 The completion auditor is independent and semantic, not a paperwork checklist. It may inspect files and command output, and it will reject scaffold-only, alpha, template, proxy-metric, or weakly verified completions with <disapproved/>.
@@ -62,6 +64,8 @@ export function continuationPrompt(goal: GoalRecord): string {
 		"",
 		"Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.",
 		"",
+		"To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.",
+		"",
 		"Avoid repeating work that is already done. Choose the next concrete action toward the objective.",
 		"",
 		"Before deciding that the goal is achieved, perform a completion audit against the actual current state:",

package/extensions/widgets/goal-widget.ts CHANGED Viewed

@@ -24,8 +24,12 @@ export interface AuditorWidgetProgress {
 	currentToolArgs?: string;
 	currentToolStartedAt?: number;
 	recentOutput: string[];
-	phase: "running" | "tool_executing" | "producing_report" | "done";
+	phase: "running" | "tool_executing" | "producing_report" | "thinking" | "done";
 	elapsedMs: number;
+	/** Current step label shown to the user */
+	label?: string;
+	/** Completion percentage from 0 to 100 */
+	percentage?: number;
 }
 export interface GoalWidgetOptions {
@@ -52,6 +56,13 @@ function branchLine(theme: Theme, width: number, isLast: boolean, content: strin
 	return fit(`${theme.fg("dim", prefix)} ${content}`, width);
 }
+function progressBar(pct: number, barWidth: number, theme: Theme): string {
+	const safeBar = Math.max(3, barWidth);
+	const filled = Math.min(safeBar, Math.max(0, Math.round((pct / 100) * safeBar)));
+	const empty = safeBar - filled;
+	return `[${theme.fg("accent", "█".repeat(filled))}${theme.fg("dim", "░".repeat(empty))}]`;
+}
 function displayIcon(goal: GoalWidgetRecord): { icon: string; color: GoalWidgetColor; label: string } {
 	if (goal.status === "complete") return { icon: "✓", color: "success", label: "complete" };
 	if (goal.status === "paused") {
@@ -81,8 +92,17 @@ function spinnerFrame(): string {
 export function renderAuditorWidgetLines(progress: AuditorWidgetProgress, theme: Theme, width: number): string[] {
 	const safeWidth = Math.max(1, width);
 	const isActive = progress.phase !== "done";
-	const icon = isActive ? theme.fg("accent", spinnerFrame()) : theme.fg("success", "✓");
-	const label = isActive ? "auditing" : "audit complete";
+	const isThinking = progress.phase === "thinking";
+	const icon = isActive
+		? isThinking
+			? theme.fg("muted", "⟡")
+			: theme.fg("accent", spinnerFrame())
+		: theme.fg("success", "✓");
+	const label = isActive
+		? isThinking
+			? "thinking..."
+			: "auditing"
+		: "audit complete";
 	// formatDuration expects seconds, progress.elapsedMs is in milliseconds
 	const duration = formatDuration(Math.floor(progress.elapsedMs / 1000));
 	const lines: string[] = [
@@ -94,7 +114,30 @@ export function renderAuditorWidgetLines(progress: AuditorWidgetProgress, theme:
 		),
 	];
-	if (isActive && progress.currentTool) {
+	// Show step label when available
+	if (progress.label) {
+		lines.push(branchLine(
+			theme,
+			safeWidth,
+			false,
+			`${theme.fg("text", truncateText(progress.label, Math.max(8, safeWidth - 6)))}`,
+		));
+	}
+	// Show progress bar when percentage is available
+	if (typeof progress.percentage === "number") {
+		const barWidth = Math.max(6, Math.min(safeWidth - 10, 30));
+		const bar = progressBar(progress.percentage, barWidth, theme);
+		const pct = `${theme.fg("muted", `${Math.round(progress.percentage)}%`)}`;
+		lines.push(branchLine(
+			theme,
+			safeWidth,
+			isActive && !progress.currentTool && progress.recentOutput.length === 0 && !isThinking,
+			`${bar} ${pct}`,
+		));
+	}
+	if (isActive && !isThinking && progress.currentTool) {
 		const argText = progress.currentToolArgs
 			? truncateText(progress.currentToolArgs, Math.max(10, safeWidth - 24))
 			: "";
@@ -129,7 +172,7 @@ export function renderAuditorWidgetLines(progress: AuditorWidgetProgress, theme:
 	}
 	// Show skip hint when audit is actively running
-	if (isActive) {
+	if (isActive && !isThinking) {
 		lines.push(branchLine(
 			theme,
 			safeWidth,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-goal-x",
-  "version": "0.9.0",
+  "version": "0.10.1",
   "description": "Goal mode extension for pi: persistent long-running objectives, /goal-set drafting, Sisyphus prompt style, autoContinue, and an above-editor status overlay. Fork of @capyup/pi-goal.",
   "license": "MIT",
   "author": "pi-goal-x contributors",