pi-goal-x 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,7 +27,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
27
27
 
28
28
  ### Completion auditor
29
29
 
30
- - **Live progress widget** — when the auditor runs, the TUI shows a spinner, the current tool being executed, and recent output lines. No more wondering if anything is happening.
30
+ - **Live progress widget** — when the auditor runs, the TUI shows a spinner, a progress bar (`[████░░░░] 40%`), step labels (`Inspecting files...`, `Verifying success criteria...`), the current tool being executed, and recent output lines. No more wondering if anything is happening.
31
31
  - **Escape to skip** — press Escape during an audit to abort it and complete the goal immediately. The skip is recorded in the ledger as `audit_skipped` with reason `user_aborted` and auditor model metadata.
32
32
  - **Disable the auditor entirely** — set `disabled: true` in `.pi/goal-auditor.json` (or toggle it via `/goal-settings` → `disabled`). The agent can still bypass with user confirmation by passing `confirmBypassAuditor: true` to `update_goal`.
33
33
  - **Skipped audits are recorded** — every skip (whether disabled or Escape-aborted) is logged to the ledger with the reason, provider, model, and thinking level for full traceability.
@@ -35,6 +35,7 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
35
35
  - **Cleaner lifecycle** — `AbortSignal` is properly wired to `session.abort()`, animation timers are cleaned up, and the unsubscribe path is always executed. No more having to kill the session.
36
36
  - **Completion report includes full auditor output** — the auditor's full report is included in the goal completion conversation message upon approval, not just a verdict.
37
37
  - **Session factory injection** — `runGoalCompletionAuditor` accepts an optional `createSession` parameter for testability, enabling mock auditor sessions in tests.
38
+ - **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `update_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
38
39
 
39
40
  ### Drafting & UX
40
41
 
@@ -1,10 +1,13 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
+ import type { Static } from "@earendil-works/pi-ai";
4
+ import { Type } from "@earendil-works/pi-ai";
3
5
  import type { ThinkingLevel } from "@earendil-works/pi-agent-core";
4
6
  import type { Model } from "@earendil-works/pi-ai";
5
7
  import {
6
8
  createAgentSession,
7
9
  createExtensionRuntime,
10
+ defineTool,
8
11
  SessionManager,
9
12
  SettingsManager,
10
13
  type ExtensionContext,
@@ -29,9 +32,13 @@ export interface AuditorProgress {
29
32
  /** Recent text output lines from the auditor's assistant messages */
30
33
  recentOutput: string[];
31
34
  /** Phase of the audit */
32
- phase: "running" | "tool_executing" | "producing_report" | "done";
35
+ phase: "running" | "tool_executing" | "producing_report" | "thinking" | "done";
33
36
  /** Elapsed ms since audit started */
34
37
  elapsedMs: number;
38
+ /** Current step label shown to the user (e.g. "Inspecting files...") */
39
+ label?: string;
40
+ /** Completion percentage from 0 to 100 */
41
+ percentage?: number;
35
42
  }
36
43
 
37
44
  export type AuditorProgressCallback = (progress: AuditorProgress) => void;
@@ -120,10 +127,22 @@ export function parseAuditorDecision(output: string): { approved: boolean; disap
120
127
  return { approved: approved && !disapproved, disapproved };
121
128
  }
122
129
 
130
+ export interface AuditorTestResults {
131
+ /** Exit code of the test run (0 = success) */
132
+ exitCode: number;
133
+ /** Test suite name, e.g. 'npm test' */
134
+ suiteName?: string;
135
+ /** Last lines of test output showing results */
136
+ output?: string;
137
+ /** ISO timestamp of when tests were run */
138
+ timestamp?: string;
139
+ }
140
+
123
141
  export function buildGoalAuditorPrompt(args: {
124
142
  goal: GoalRecord;
125
143
  completionSummary?: string | null;
126
144
  detailedSummary: string;
145
+ testResults?: AuditorTestResults | null;
127
146
  }): string {
128
147
  return [
129
148
  "You are the independent completion auditor for pi-goal.",
@@ -150,15 +169,53 @@ export function buildGoalAuditorPrompt(args: {
150
169
  "<goal_details>",
151
170
  args.detailedSummary,
152
171
  "</goal_details>",
172
+ ...(args.testResults ? [
173
+ "",
174
+ "Executor test evidence:",
175
+ "<test_evidence>",
176
+ ` Suite: ${args.testResults.suiteName ?? "(not specified)"}`,
177
+ ` Exit code: ${args.testResults.exitCode}`,
178
+ ` Timestamp: ${args.testResults.timestamp ?? "(not specified)"}`,
179
+ ` Output:`,
180
+ ...(args.testResults.output ? args.testResults.output.split("\n").map((l) => ` ${l}`) : [" (none provided)"]),
181
+ "</test_evidence>",
182
+ ] : []),
153
183
  "",
154
184
  "Audit checklist:",
155
- "1. Extract the real success criteria from the objective, including quality/reader outcomes.",
156
- "2. Inspect artifacts or command output that can prove or disprove those criteria.",
157
- "3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
158
- "4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
185
+ ...(args.testResults ? [
186
+ "1. Extract the real success criteria from the objective, including quality/reader outcomes.",
187
+ "2. Inspect artifacts or command output that can prove or disprove those criteria.",
188
+ "3. Before running a test suite with bash, check the <test_evidence> block. If the executor has provided recent passing test results for that suite, accept them as evidence rather than re-running the tests.",
189
+ "4. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
190
+ "5. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
191
+ ] : [
192
+ "1. Extract the real success criteria from the objective, including quality/reader outcomes.",
193
+ "2. Inspect artifacts or command output that can prove or disprove those criteria.",
194
+ "3. Explain missing or weak evidence, especially scaffold-vs-final quality gaps.",
195
+ "4. End with exactly <approved/> only if the objective is truly complete; otherwise end with exactly <disapproved/>.",
196
+ ]),
197
+ "",
198
+ "Progress reporting:",
199
+ "You have the report_auditor_progress tool available to report your progress to the user.",
200
+ "Please use it at natural phase boundaries:",
201
+ " - When starting: report_auditor_progress(label='Starting audit...', percentage=0)",
202
+ " - When beginning file inspection: report_auditor_progress(label='Inspecting files...', percentage=25)",
203
+ " - When verifying success criteria: report_auditor_progress(label='Verifying success criteria...', percentage=50)",
204
+ " - When evaluating evidence: report_auditor_progress(label='Evaluating evidence...', percentage=75)",
205
+ " - When producing final report: report_auditor_progress(label='Producing report...', percentage=90)",
206
+ "This is purely for user visibility and does not affect the audit outcome.",
159
207
  ].join("\n");
160
208
  }
161
209
 
210
+ /** Tool name for auditor progress reporting */
211
+ export const REPORT_AUDITOR_PROGRESS_TOOL_NAME = "report_auditor_progress";
212
+
213
+ /** Parameters for the report_auditor_progress tool */
214
+ export const reportAuditorProgressParams = Type.Object({
215
+ label: Type.String({ description: "Current step label describing what the auditor is doing (e.g. 'Inspecting files...', 'Verifying success criteria...', 'Producing report...')" }),
216
+ percentage: Type.Number({ description: "Completion percentage from 0 to 100", minimum: 0, maximum: 100 }),
217
+ });
218
+
162
219
  function makeAuditorResourceLoader(): ResourceLoader {
163
220
  return {
164
221
  getExtensions: () => ({ extensions: [], errors: [], runtime: createExtensionRuntime() }),
@@ -170,9 +227,14 @@ function makeAuditorResourceLoader(): ResourceLoader {
170
227
  "You are a read-only completion auditor running in an isolated pi agent session.",
171
228
  "Inspect the repository and decide whether the claimed goal completion is genuinely satisfied.",
172
229
  "Never modify files. Never approve unless the actual user objective is complete.",
230
+ "",
231
+ "You have the report_auditor_progress tool available. Use it to report your audit progress",
232
+ "to the user at natural phase boundaries (starting, inspecting files, verifying criteria,",
233
+ "producing report). This helps the user understand what the auditor is doing and how far",
234
+ "along it is.",
173
235
  ].join("\n"),
174
236
  getAppendSystemPrompt: () => [],
175
- extendResources: () => {},
237
+ extendResources: () => {},
176
238
  reload: async () => {},
177
239
  };
178
240
  }
@@ -209,6 +271,7 @@ export async function runGoalCompletionAuditor(args: {
209
271
  goal: GoalRecord;
210
272
  completionSummary?: string | null;
211
273
  detailedSummary: string;
274
+ testResults?: AuditorTestResults | null;
212
275
  signal?: AbortSignal;
213
276
  onProgress?: AuditorProgressCallback;
214
277
  /**
@@ -228,16 +291,6 @@ export async function runGoalCompletionAuditor(args: {
228
291
  }
229
292
  try {
230
293
  const createSession = args.createSession ?? createAgentSession;
231
- const { session } = await createSession({
232
- cwd: args.ctx.cwd,
233
- model,
234
- thinkingLevel,
235
- modelRegistry: args.ctx.modelRegistry,
236
- resourceLoader: makeAuditorResourceLoader(),
237
- sessionManager: SessionManager.inMemory(args.ctx.cwd),
238
- settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }),
239
- tools: ["read", "grep", "find", "ls", "bash"],
240
- });
241
294
  const startedAt = Date.now();
242
295
  const progress: AuditorProgress = {
243
296
  recentOutput: [],
@@ -248,6 +301,49 @@ export async function runGoalCompletionAuditor(args: {
248
301
  progress.elapsedMs = Date.now() - startedAt;
249
302
  args.onProgress?.({ ...progress });
250
303
  }
304
+
305
+ // Build the report_auditor_progress tool, capturing the progress state
306
+ const reportProgressTool = defineTool({
307
+ name: REPORT_AUDITOR_PROGRESS_TOOL_NAME,
308
+ label: "Report Auditor Progress",
309
+ description: "Report current progress of the audit to the user. Call this at natural phase boundaries (starting, inspecting files, verifying criteria, producing report) to keep the user informed.",
310
+ promptSnippet: "Report current audit progress (step label and completion percentage) to the user.",
311
+ promptGuidelines: [
312
+ "Use report_auditor_progress at natural phase boundaries during the audit:",
313
+ " - When starting the audit: label='Starting audit...' percentage=0",
314
+ " - When beginning file inspection: label='Inspecting files...' percentage=25",
315
+ " - When verifying success criteria: label='Verifying success criteria...' percentage=50",
316
+ " - When evaluating evidence: label='Evaluating evidence...' percentage=75",
317
+ " - When producing final report: label='Producing report...' percentage=90",
318
+ "This is purely for user visibility — it does not affect the audit outcome.",
319
+ "Do not call this tool more than once every few seconds to avoid flooding.",
320
+ ],
321
+ parameters: reportAuditorProgressParams,
322
+ executionMode: "sequential",
323
+ async execute(_toolCallId, params) {
324
+ const { label, percentage } = params as Static<typeof reportAuditorProgressParams>;
325
+ progress.label = label;
326
+ progress.percentage = percentage;
327
+ progress.phase = "running";
328
+ emitProgress();
329
+ return {
330
+ content: [{ type: "text", text: `Progress reported: ${label} (${percentage}%)` }],
331
+ details: {},
332
+ };
333
+ },
334
+ });
335
+
336
+ const { session } = await createSession({
337
+ cwd: args.ctx.cwd,
338
+ model,
339
+ thinkingLevel,
340
+ modelRegistry: args.ctx.modelRegistry,
341
+ resourceLoader: makeAuditorResourceLoader(),
342
+ sessionManager: SessionManager.inMemory(args.ctx.cwd),
343
+ settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }),
344
+ tools: ["read", "grep", "find", "ls", "bash", REPORT_AUDITOR_PROGRESS_TOOL_NAME],
345
+ customTools: [reportProgressTool],
346
+ });
251
347
  const unsubscribe = session.subscribe((event) => {
252
348
  if (event.type === "tool_execution_start") {
253
349
  progress.currentTool = event.toolName;
@@ -268,6 +364,20 @@ export async function runGoalCompletionAuditor(args: {
268
364
  return;
269
365
  }
270
366
  if (event.type === "message_update") {
367
+ // Check for thinking events from the assistant stream
368
+ const streamEvent = (event as any).assistantMessageEvent;
369
+ if (streamEvent?.type === "thinking_start") {
370
+ progress.phase = "thinking";
371
+ if (!progress.label) progress.label = "Analyzing goal...";
372
+ emitProgress();
373
+ return;
374
+ }
375
+ if (streamEvent?.type === "thinking_end") {
376
+ progress.phase = "running";
377
+ emitProgress();
378
+ return;
379
+ }
380
+ // For text content, show producing_report phase
271
381
  progress.phase = "producing_report";
272
382
  const message = event.message as any;
273
383
  if (message?.role === "assistant") {
@@ -300,6 +410,8 @@ export async function runGoalCompletionAuditor(args: {
300
410
  args.signal?.addEventListener("abort", abortSession, { once: true });
301
411
 
302
412
  // Emit initial progress
413
+ progress.label = "Starting audit...";
414
+ progress.percentage = 0;
303
415
  emitProgress();
304
416
  try {
305
417
  if (args.signal?.aborted) return { approved: false, disapproved: true, output: "", model: modelLabel(model), thinkingLevel, error: "Auditor aborted." };
@@ -307,6 +419,8 @@ export async function runGoalCompletionAuditor(args: {
307
419
  } finally {
308
420
  args.signal?.removeEventListener("abort", abortSession);
309
421
  progress.phase = "done";
422
+ progress.label = "Audit complete.";
423
+ progress.percentage = 100;
310
424
  emitProgress();
311
425
  unsubscribe();
312
426
  }
@@ -441,6 +441,8 @@ export default function goalExtension(pi: ExtensionAPI): void {
441
441
  active.add(QUESTIONNAIRE_TOOL_NAME);
442
442
  } else if (state.goal?.status === "active") {
443
443
  for (const name of goalExecutionWorkTools) active.add(name);
444
+ active.add(QUESTION_TOOL_NAME);
445
+ active.add(QUESTIONNAIRE_TOOL_NAME);
444
446
  }
445
447
  pi.setActiveTools(Array.from(active));
446
448
  } catch {}
@@ -1706,12 +1708,19 @@ export default function goalExtension(pi: ExtensionAPI): void {
1706
1708
  "Do not use update_goal=complete as an escape hatch when you are blocked. If you are blocked, call pause_goal({reason, suggestedAction?}) instead so the user can intervene.",
1707
1709
  "For sisyphus goals, do not mark complete until every numbered step has been executed and individually verified against its done criterion.",
1708
1710
  "If the user gives requirements, feedback, or corrections that differ from the goal objective, the goal is stale. Use update_goal with updatedObjective to sync the objective before continuing work or before marking the goal complete. This ensures the auditor evaluates against the latest requirements.",
1711
+ "If you have just run the test suite successfully and the tests all pass, include a testResults object with the exit code (0) and relevant output. The auditor will see this evidence and can skip re-running the tests.",
1709
1712
  ],
1710
1713
  parameters: Type.Object({
1711
1714
  status: Type.Optional(StringEnum([COMPLETE_STATUS] as const, { description: "Set to complete only when the objective is achieved." })),
1712
1715
  completionSummary: Type.Optional(Type.String({ description: "Concise completion claim and evidence summary passed to the independent auditor agent." })),
1713
1716
  confirmBypassAuditor: Type.Optional(Type.Boolean({ description: "Set to true to confirm bypassing the independent auditor when it is disabled in settings." })),
1714
1717
  updatedObjective: Type.Optional(Type.String({ description: "Revised goal objective. Use when the user's requirements have changed mid-flight. The goal remains active so the agent can continue working toward the new objective. Can be combined with status=complete to update the objective before the completion audit." })),
1718
+ testResults: Type.Optional(Type.Object({
1719
+ exitCode: Type.Number({ description: "Exit code of the test run (0 = success)" }),
1720
+ suiteName: Type.Optional(Type.String({ description: "Test suite name, e.g. 'npm test'" })),
1721
+ output: Type.Optional(Type.String({ description: "Last lines of test output showing results" })),
1722
+ timestamp: Type.Optional(Type.String({ description: "ISO timestamp of when tests were run" })),
1723
+ }, { description: "Structured test evidence passed to the auditor so it can skip redundant test re-runs. If you have just run the test suite successfully, include this so the auditor accepts the results without re-running." })),
1715
1724
  }),
1716
1725
  executionMode: "sequential",
1717
1726
  async execute(_toolCallId, params, signal, _onUpdate, ctx) {
@@ -1913,6 +1922,7 @@ export default function goalExtension(pi: ExtensionAPI): void {
1913
1922
  goal: auditTarget,
1914
1923
  completionSummary: params.completionSummary,
1915
1924
  detailedSummary: detailedSummary(auditTarget),
1925
+ testResults: params.testResults,
1916
1926
  signal: auditAbortController.signal,
1917
1927
  onProgress: (progress) => {
1918
1928
  auditProgress = {
@@ -36,6 +36,8 @@ ${untrustedObjectiveBlock(goal)}
36
36
 
37
37
  Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.
38
38
 
39
+ To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.
40
+
39
41
  Keep this goal in force until it is actually achieved. Do not pause for confirmation just because a phase, chapter, file, or checklist item is finished. At each natural stopping point, compare every explicit requirement with concrete evidence from the workspace/session. If the objective is complete, call update_goal with status=complete and summarize the evidence; update_goal will launch an independent pi auditor agent and only archive if that auditor returns <approved/>. If it is not complete, choose the next concrete action and do it.
40
42
 
41
43
  The completion auditor is independent and semantic, not a paperwork checklist. It may inspect files and command output, and it will reject scaffold-only, alpha, template, proxy-metric, or weakly verified completions with <disapproved/>.
@@ -62,6 +64,8 @@ export function continuationPrompt(goal: GoalRecord): string {
62
64
  "",
63
65
  "Available work tools for pursuing the active goal include write, read, bash, and edit. Use those tools directly for file and shell work; do not call get_goal repeatedly to discover tools.",
64
66
  "",
67
+ "To ask the user a structured question (e.g. when the user's spec changes and you need to clarify before updating the goal), use goal_question. It opens a question dialog and returns the user's answer as tool output. Use plain conversation for simple clarifications.",
68
+ "",
65
69
  "Avoid repeating work that is already done. Choose the next concrete action toward the objective.",
66
70
  "",
67
71
  "Before deciding that the goal is achieved, perform a completion audit against the actual current state:",
@@ -24,8 +24,12 @@ export interface AuditorWidgetProgress {
24
24
  currentToolArgs?: string;
25
25
  currentToolStartedAt?: number;
26
26
  recentOutput: string[];
27
- phase: "running" | "tool_executing" | "producing_report" | "done";
27
+ phase: "running" | "tool_executing" | "producing_report" | "thinking" | "done";
28
28
  elapsedMs: number;
29
+ /** Current step label shown to the user */
30
+ label?: string;
31
+ /** Completion percentage from 0 to 100 */
32
+ percentage?: number;
29
33
  }
30
34
 
31
35
  export interface GoalWidgetOptions {
@@ -52,6 +56,13 @@ function branchLine(theme: Theme, width: number, isLast: boolean, content: strin
52
56
  return fit(`${theme.fg("dim", prefix)} ${content}`, width);
53
57
  }
54
58
 
59
+ function progressBar(pct: number, barWidth: number, theme: Theme): string {
60
+ const safeBar = Math.max(3, barWidth);
61
+ const filled = Math.min(safeBar, Math.max(0, Math.round((pct / 100) * safeBar)));
62
+ const empty = safeBar - filled;
63
+ return `[${theme.fg("accent", "█".repeat(filled))}${theme.fg("dim", "░".repeat(empty))}]`;
64
+ }
65
+
55
66
  function displayIcon(goal: GoalWidgetRecord): { icon: string; color: GoalWidgetColor; label: string } {
56
67
  if (goal.status === "complete") return { icon: "✓", color: "success", label: "complete" };
57
68
  if (goal.status === "paused") {
@@ -81,8 +92,17 @@ function spinnerFrame(): string {
81
92
  export function renderAuditorWidgetLines(progress: AuditorWidgetProgress, theme: Theme, width: number): string[] {
82
93
  const safeWidth = Math.max(1, width);
83
94
  const isActive = progress.phase !== "done";
84
- const icon = isActive ? theme.fg("accent", spinnerFrame()) : theme.fg("success", "✓");
85
- const label = isActive ? "auditing" : "audit complete";
95
+ const isThinking = progress.phase === "thinking";
96
+ const icon = isActive
97
+ ? isThinking
98
+ ? theme.fg("muted", "⟡")
99
+ : theme.fg("accent", spinnerFrame())
100
+ : theme.fg("success", "✓");
101
+ const label = isActive
102
+ ? isThinking
103
+ ? "thinking..."
104
+ : "auditing"
105
+ : "audit complete";
86
106
  // formatDuration expects seconds, progress.elapsedMs is in milliseconds
87
107
  const duration = formatDuration(Math.floor(progress.elapsedMs / 1000));
88
108
  const lines: string[] = [
@@ -94,7 +114,30 @@ export function renderAuditorWidgetLines(progress: AuditorWidgetProgress, theme:
94
114
  ),
95
115
  ];
96
116
 
97
- if (isActive && progress.currentTool) {
117
+ // Show step label when available
118
+ if (progress.label) {
119
+ lines.push(branchLine(
120
+ theme,
121
+ safeWidth,
122
+ false,
123
+ `${theme.fg("text", truncateText(progress.label, Math.max(8, safeWidth - 6)))}`,
124
+ ));
125
+ }
126
+
127
+ // Show progress bar when percentage is available
128
+ if (typeof progress.percentage === "number") {
129
+ const barWidth = Math.max(6, Math.min(safeWidth - 10, 30));
130
+ const bar = progressBar(progress.percentage, barWidth, theme);
131
+ const pct = `${theme.fg("muted", `${Math.round(progress.percentage)}%`)}`;
132
+ lines.push(branchLine(
133
+ theme,
134
+ safeWidth,
135
+ isActive && !progress.currentTool && progress.recentOutput.length === 0 && !isThinking,
136
+ `${bar} ${pct}`,
137
+ ));
138
+ }
139
+
140
+ if (isActive && !isThinking && progress.currentTool) {
98
141
  const argText = progress.currentToolArgs
99
142
  ? truncateText(progress.currentToolArgs, Math.max(10, safeWidth - 24))
100
143
  : "";
@@ -129,7 +172,7 @@ export function renderAuditorWidgetLines(progress: AuditorWidgetProgress, theme:
129
172
  }
130
173
 
131
174
  // Show skip hint when audit is actively running
132
- if (isActive) {
175
+ if (isActive && !isThinking) {
133
176
  lines.push(branchLine(
134
177
  theme,
135
178
  safeWidth,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-goal-x",
3
- "version": "0.9.0",
3
+ "version": "0.10.1",
4
4
  "description": "Goal mode extension for pi: persistent long-running objectives, /goal-set drafting, Sisyphus prompt style, autoContinue, and an above-editor status overlay. Fork of @capyup/pi-goal.",
5
5
  "license": "MIT",
6
6
  "author": "pi-goal-x contributors",