pi-goal-x 0.16.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -31,6 +31,10 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
31
31
  - **Recursive subtasks** — tasks can have nested sub-tasks via `subtasks?: GoalTask[]` (full recursive type). Subtask depth is controlled globally by `subtaskDepth` in `.pi/pi-goal-x-settings.json` (default: 1 level). Too-deep subtrees are rejected at proposal.
32
32
  - **Lightweight subtasks** — each task has an optional `lightweightSubtasks?: boolean` flag. When true, the parent can complete regardless of subtask status. When false/absent (full subtasks), all subtasks must be individually complete before the parent can close.
33
33
  - **Per-task completion** — `complete_task` marks individual tasks done with optional evidence/verificationSummary, and `skip_task` marks tasks as skipped with a required reason. Neither stops the turn, so the agent can continue uninterrupted.
34
+ - **Recursive lookup** — `findTaskInTree` and `updateTaskInTree` search and update tasks at any depth. Subtask IDs are valid targets for `complete_task` and `skip_task`.
35
+ - **Subtask gate** — parent tasks with full subtasks require all sub-items to be completed or skipped before the parent can close, enforced by recursive `checkSubtasksComplete`.
36
+ - **Duplicate ID validation** — `validateTaskListProposal` recursively checks all task IDs across the entire tree, preventing collisions between parent/subtask or sibling subtasks.
37
+ - **Agent workflow guidance** — prompts include a `[TASK WORKFLOW]` section directing agents to use tasks as progress trackers, completing subtasks immediately when work finishes (not batch-marking at the end).
34
38
  - **Hierarchical display** — task lists with subtasks render with indentation in prompts (`taskListBlock`, `goalPrompt`, `continuationPrompt`) and in the TUI widget (recursive count, BFS next-pending).
35
39
  - **Optional `taskList`** — goals without a task list work exactly as before. The feature is entirely opt-in.
36
40
  - **Soft `complete_goal` gate** — when `blockCompletion: true` is set, `complete_goal` surfaces a warning if pending tasks remain (prompt-level only; the agent can still complete).
@@ -48,19 +52,20 @@ All core features of [@capyup/pi-goal](https://github.com/capyup/pi-goal) are pr
48
52
  ### E2e test infrastructure
49
53
 
50
54
  - **Deterministic fork tests using `--mode json`**: the e2e suite spawns a real `pi --fork --mode json` session, parses structured `tool_execution_start`/`tool_execution_end` JSON events for field-level assertions — no free-text AI output parsing. Uses `--append-system-prompt` + `--tools` to force deterministic tool calls.
51
- - **Full coverage**: 281 tests total — function-level integration tests, mock-pi handler tests, file-validity checks, real `pi --fork --mode json` E2E tests, propose_goal_tweak unit/integration/e2e tests, task list policy/round-trip/render tests (including subtasks), and verification contract tests.
55
+ - **Full coverage**: 310 tests total — function-level integration tests, mock-pi handler tests, file-validity checks, real `pi --fork --mode json` E2E tests, propose_goal_tweak unit/integration/e2e tests, task list policy/round-trip/render tests (including subtasks), and verification contract tests.
52
56
 
53
57
  ### Completion auditor
54
58
 
55
59
  - **Live progress widget** — when the auditor runs, the TUI shows a spinner, a progress bar (`[████░░░░] 40%`), step labels (`Inspecting files...`, `Verifying success criteria...`), the current tool being executed, and recent output lines. No more wondering if anything is happening.
60
+ - **Per-goal auditor toggle** — during goal confirmation, press `a` to toggle the auditor on/off for that goal. The toggle uses a ●/○ indicator between the goal summary and confirm options. The default position comes from settings; the per-goal override persists within the session.
56
61
  - **Escape to skip** — press Escape during an audit to abort it and complete the goal immediately. The skip is recorded in the ledger as `audit_skipped` with reason `user_aborted` and auditor model metadata.
57
- - **Disable the auditor entirely** — set `disabled: true` in `.pi/pi-goal-x-settings.json` (or toggle it via `/goal-settings`). The agent can still bypass with user confirmation by passing `confirmBypassAuditor: true` to `update_goal`.
62
+ - **Disable the auditor entirely** — set `disabled: true` in `.pi/pi-goal-x-settings.json` (or toggle it via `/goal-settings`). The agent can still bypass with user confirmation by passing `confirmBypassAuditor: true` to `complete_goal`.
58
63
  - **Skipped audits are recorded** — every skip (whether disabled or Escape-aborted) is logged to the ledger with the reason, provider, model, and thinking level for full traceability.
59
64
  - **Robust abort detection** — the auditor detects aborts both from exceptions *and* from `session.prompt()` returning after an abort signal, preventing stuck goals or ghost states.
60
65
  - **Cleaner lifecycle** — `AbortSignal` is properly wired to `session.abort()`, animation timers are cleaned up, and the unsubscribe path is always executed. No more having to kill the session.
61
66
  - **Completion report includes full auditor output** — the auditor's full report is included in the goal completion conversation message upon approval, not just a verdict.
62
67
  - **Session factory injection** — `runGoalCompletionAuditor` accepts an optional `createSession` parameter for testability, enabling mock auditor sessions in tests.
63
- - **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `update_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
68
+ - **Structured test evidence** — the executor can pass `testResults` (exit code, suite name, output, timestamp) via `complete_goal({testResults})`. The auditor receives a `<test_evidence>` block and is instructed to check it before re-running test suites, skipping redundant re-runs.
64
69
 
65
70
  ### Drafting & UX
66
71
 
@@ -232,7 +237,7 @@ The completion result prints a full report into the conversation:
232
237
  - the auditor's approval report
233
238
  - full current goal details, including objective, status, usage, mode, and file path
234
239
 
235
- Sisyphus goals use the same completion tool as regular goals. The stricter part is the prompt/criteria standard: the agent should only call completion after the whole ordered objective is actually satisfied and likely to survive independent auditing. A paused goal can also be completed directly when the agent already has enough evidence that every requirement is satisfied; it does not need a resume just to call `update_goal`.
240
+ Sisyphus goals use the same completion tool as regular goals. The stricter part is the prompt/criteria standard: the agent should only call completion after the whole ordered objective is actually satisfied and likely to survive independent auditing. A paused goal can also be completed directly when the agent already has enough evidence that every requirement is satisfied; it does not need a resume just to call `complete_goal`.
236
241
 
237
242
  ## Schema gates
238
243
 
@@ -0,0 +1,160 @@
1
+ # Changelog
2
+
3
+ ## 0.18.0 (2026-05-29)
4
+
5
+ ### Features
6
+
7
+ - **Enriched confirmation dialog** — proposal dialogs now render with full-width box-drawing section headers (`┌─ Section Name ─────┐`), per-status task coloring (`[x]` green, `[ ]` yellow), and goal structure section highlighting (`Objective:`, `Success criteria:`, etc. in accent). The 12-line MAX_CONTEXT_LINES cap is removed — full proposals are always visible.
8
+ - **Hidden TUI debug mode** — Ctrl+Shift+X toggles a debug panel in the goal widget. Ctrl+Shift+N creates/removes test goals (written to `.pi/goals/debug/`), Ctrl+Shift+T injects sample tasks, Ctrl+Shift+R starts a mock audit, Ctrl+Shift+O opens the proposal dialog with realistic data.
9
+
10
+ ### Fixes
11
+
12
+ - **Text wrapping inside boxes** — pipe-prefixed lines (`│ content`) that wrap now maintain the `│ ` prefix on continuation lines, keeping wrapped text inside the ASCII box. Task checkbox lines embedded in objective text also get the `│ ` prefix so they appear within the box.
13
+
14
+ ### Tests
15
+
16
+ - 310 total tests (unchanged).
17
+
18
+ ## 0.17.0 (2026-05-29)
19
+
20
+ ### Features
21
+
22
+ - **Per-goal auditor toggle** — press `a` during the confirmation dialog to toggle the auditor on/off for a specific goal. Default from settings; override persists within session.
23
+ - **Task workflow prompt guidance** — added `[TASK WORKFLOW]` section to both `goalPrompt` and `continuationPrompt`, directing agents to complete subtasks one-by-one as progress trackers (not batch-marking at the end).
24
+ - **Recursive duplicate ID validation** — `validateTaskListProposal` now checks all task IDs across the entire tree, preventing collisions between parent/subtask or sibling subtask IDs.
25
+ - **Escape dialog during audit** — pressing Escape during a completion audit shows a TUI dialog with "Mark complete without audit" or "Continue working" options.
26
+
27
+ ### Fixes
28
+
29
+ - `validateTaskCompletion` and `validateTaskSkip` now use recursive `findTaskInTree` instead of flat `Array.find()` for nested subtask support.
30
+ - Updated README references from legacy `update_goal` to `complete_goal`.
31
+
32
+ ### Tests
33
+
34
+ - 310 total tests (up from 308).
35
+ - Added tests for recursive duplicate ID detection across nested subtask trees.
36
+ - Added e2e test for `skipAuditor=true` path.
37
+
38
+ ## 0.16.1 (2026-05-29)
39
+
40
+ ### Features
41
+
42
+ - **Escape-to-skip audit** — press Escape during an auditor run to abort it and complete the goal immediately. The skip is recorded in the ledger with the reason `user_aborted` and auditor model metadata.
43
+ - **Audit progress widget** — the TUI shows a spinner, progress bar, step labels, current tool, and output lines while the auditor runs.
44
+ - **Audit abort detection** — the auditor detects aborts both from exceptions and from `session.prompt()` returning after an abort signal, preventing stuck goals or ghost states.
45
+ - **Goal status for Sisyphus** — `COMPLETED` status label for completed Sisyphus goals.
46
+ - **Multi-session focus isolation** — goal focus data uses `goalFocusDetails` which includes the goal id and reason but not full balance data, preventing cross-session focus leakage.
47
+
48
+ ### Fixes
49
+
50
+ - Fixed a merge bug where `propose_task_list` could produce duplicate task list when called during a continuation.
51
+
52
+ ## 0.16.0 (2026-05-29)
53
+
54
+ ### Features
55
+
56
+ - **`delete_goal` tool** — new lifecycle tool for archiving goals by id. Accepts a required `goalId` and optional `reason`. Agent-facing only; not intended for user use.
57
+ - **`complete_goal` `status` optional** — the `status` parameter on `complete_goal` is now optional. When omitted, defaults to `"complete"`. Explicitly setting an invalid value (anything other than `"complete"`) still produces an error.
58
+ - **SCROLL FIX** — the confirmation dialog no longer scrolls to the bottom when the user is scrolled up and new content arrives. Uses `addContextWrapped()` which suppresses viewport resets.
59
+ - **Task list shown first** — the task list section now appears FIRST in the confirmation dialog context (before the objective), with context capped at 12 lines so tasks don't scroll off-screen.
60
+ - **Audit completion flow** — the completion report card no longer says "Goal audit approved." when the auditor was skipped (now shows "Goal audit skipped." with reason).
61
+
62
+ ### Fixes
63
+
64
+ - Fixed task completion/skip validation for nested subtasks (uses recursive `findTaskInTree`).
65
+ - All `complete_goal` calls default to `status: "complete"` when no explicit status is provided.
66
+ - Updated prompts and tool descriptions to reflect the `complete_goal` naming.
67
+
68
+ ### Tests
69
+
70
+ - Updated e2e tests to verify `complete_goal` accepts calls without status.
71
+ - Added e2e test verifying `complete_goal` rejects invalid explicit status.
72
+
73
+ ## 0.15.1 (2026-05-28)
74
+
75
+ ### Fixes
76
+
77
+ - Fixed settings file reference in storage writes.
78
+
79
+ ### Documentation
80
+
81
+ - Reorganized README settings documentation for clarity.
82
+
83
+ ## 0.14.0 (2026-05-27)
84
+
85
+ ### Features
86
+
87
+ - **Subtask hierarchy** — tasks can have nested sub-tasks via `subtasks?: GoalTask[]`. Subtask depth controlled by `subtaskDepth` setting (default: 1). Deep subtrees are rejected at proposal.
88
+ - **Lightweight subtasks** — `lightweightSubtasks?: boolean` on tasks. When true, parent can complete regardless of subtask status. Full subtasks require all sub-items completed first.
89
+ - **Per-task contracts** — `propose_task_list` supports optional `verificationContract` per task. If set, `complete_task` requires a non-empty `verificationSummary`.
90
+ - **Task list block** — tasks are listed in prompts with checkboxes and status indicators.
91
+
92
+ ### Tests
93
+
94
+ - Added e2e tests for goal creation with task list, scroll fix, and subtask validation.
95
+
96
+ ## 0.13.0 (2026-05-22)
97
+
98
+ ### Features
99
+
100
+ - **Verification contract system** — goals can include a `Verification contract:` section. Extracted and stored on the goal record. `complete_goal` rejects calls without `verificationSummary` when a contract is set.
101
+ - **Per-goal verification contracts** — the contract is extracted during goal drafting and enforced by tools and prompts.
102
+ - **`complete_goal` `testResults` removed** — replaced with `verificationSummary`. The old structured test results interface is gone.
103
+ - **Auditor integration** — the independent completion auditor receives both the `verificationContract` and `verificationSummary` and cross-checks claims against real artifacts.
104
+
105
+ ### Tests
106
+
107
+ - Updated verification contract tests.
108
+
109
+ ## 0.12.0 (2026-04-29)
110
+
111
+ ### Features
112
+
113
+ - **Task list system** — `propose_task_list` tool with confirmation dialog. Tasks stored on goal record, rendered in prompts and widget, serialized to disk.
114
+ - **Unified goal + task acceptance** — `propose_goal_draft` accepts optional `tasks` array. Single dialog shows goal + task list together.
115
+ - **`complete_task` and `skip_task` tools** — per-task completion with evidence/verificationSummary. Neither stops the turn.
116
+ - **`update_goal` renamed to `complete_goal`** — the core completion tool now uses `complete_goal({status: "complete"})` and requires explicit status acceptance.
117
+ - **Completion report heading fix** — the report now shows `Goal complete.` instead of `Goal audit approved.` when no contract or auditor is involved.
118
+
119
+ ### Tests
120
+
121
+ - Full task lifecycle tests (policy, round-trip, render, edge cases).
122
+ - Verification contract tests for both goal-level and per-task contracts.
123
+
124
+ ## 0.11.0 (2026-04-23)
125
+
126
+ ### Features
127
+
128
+ - **Deferred archival** — goals are archived at `turn_end`, not inline in the tool handler. Prevents premature archiving before the agent sees the audit result.
129
+ - **`propose_goal_tweak`** — sole mechanism for updating the goal objective during `/goal-tweak`. Uses the same Confirm/Continue Chatting dialog as goal creation.
130
+ - **Focus isolation** — goal focus is stored as a branch-local session entry, not in goal markdown metadata. Multiple sessions can have different focused goals.
131
+ - **Auditor bypass with user confirmation** — `confirmBypassAuditor: true` bypasses the auditor when the user explicitly opts out.
132
+
133
+ ### Fixes
134
+
135
+ - Cleaned up lifecycle issues with AbortSignal wiring and timer cleanup.
136
+
137
+ ## 0.10.0 (2026-04-15)
138
+
139
+ ### Features
140
+
141
+ - **Completion audit system** — independent pi auditor agent verifies completion claims before archiving.
142
+ - **Audit progress** — real-time TUI progress widget with spinner, progress bar, and step labels.
143
+ - **Ledger system** — structured event log for all goal lifecycle events.
144
+
145
+ ## 0.9.0 (2026-04-08)
146
+
147
+ ### Features
148
+
149
+ - **`goal_question` and `goal_questionnaire`** — structured drafting question tools.
150
+ - **`/goal-settings`** — interactive settings configuration.
151
+ - **Sisyphus goal style** — patient ordered execution with prompt/criteria variant.
152
+
153
+ ## 0.8.1 (2026-04-01)
154
+
155
+ ### Features
156
+
157
+ - Initial fork from @capyup/pi-goal.
158
+ - Pause/resume/abort lifecycle.
159
+ - Multiple open goals.
160
+ - Auto-continue loop.
@@ -85,19 +85,29 @@ export function buildDraftConfirmationText(args: {
85
85
  }): string {
86
86
  const lines: string[] = [];
87
87
  const modeLabel = args.focus === "sisyphus" ? "Sisyphus (prompt/criteria style)" : "Normal goal";
88
- lines.push("Goal draft ready for confirmation.");
88
+ lines.push("Goal draft ready for confirmation.");
89
89
  lines.push("");
90
- lines.push("Draft details:");
91
- lines.push(`Mode: ${modeLabel}`);
92
- lines.push(`Auto-continue: ${args.autoContinue ? "yes" : "no"}`);
90
+ lines.push("─── Draft Details ───");
91
+ lines.push(`│ Mode: ${modeLabel}`);
92
+ lines.push(`│ Auto-continue: ${args.autoContinue ? "yes" : "no"}`);
93
93
  lines.push("");
94
- lines.push("Original topic:");
94
+ lines.push("─── Original Topic ───");
95
95
  lines.push("");
96
- lines.push(args.originalTopic.trim());
96
+ for (const topicLine of args.originalTopic.trim().split("\n")) {
97
+ if (topicLine.trim()) lines.push(`│ ${topicLine}`);
98
+ }
97
99
  lines.push("");
98
- lines.push("Proposed goal:");
100
+ lines.push("─── Proposed Goal ───");
99
101
  lines.push("");
100
- lines.push(args.objective);
102
+ for (const objLine of args.objective.split("\n")) {
103
+ const trimmed = objLine.trim();
104
+ if (!trimmed) continue;
105
+ if (trimmed.startsWith("│")) {
106
+ lines.push(objLine);
107
+ } else {
108
+ lines.push(`│ ${objLine}`);
109
+ }
110
+ }
101
111
  return lines.join("\n");
102
112
  }
103
113
 
@@ -109,22 +119,40 @@ export function buildTweakConfirmationText(args: {
109
119
  }): string {
110
120
  const lines: string[] = [];
111
121
  const modeLabel = args.sisyphus ? "Sisyphus (prompt/criteria style)" : "Normal goal";
112
- lines.push("Goal tweak ready for confirmation.");
122
+ lines.push("Goal tweak ready for confirmation.");
113
123
  lines.push("");
114
- lines.push("Draft details:");
115
- lines.push(`Mode: ${modeLabel}`);
124
+ lines.push("─── Draft Details ───");
125
+ lines.push(`│ Mode: ${modeLabel}`);
116
126
  lines.push("");
117
- lines.push("Change:");
127
+ lines.push("─── Change ───");
118
128
  lines.push("");
119
- lines.push(args.changeSummary);
129
+ for (const changeLine of args.changeSummary.split("\n")) {
130
+ if (changeLine.trim()) lines.push(`│ ${changeLine}`);
131
+ }
120
132
  lines.push("");
121
- lines.push("Current objective:");
133
+ lines.push("─── Current Objective ───");
122
134
  lines.push("");
123
- lines.push(args.currentObjective);
135
+ for (const curLine of args.currentObjective.split("\n")) {
136
+ const trimmed = curLine.trim();
137
+ if (!trimmed) continue;
138
+ if (trimmed.startsWith("│")) {
139
+ lines.push(curLine);
140
+ } else {
141
+ lines.push(`│ ${curLine}`);
142
+ }
143
+ }
124
144
  lines.push("");
125
- lines.push("Proposed new objective:");
145
+ lines.push("─── Proposed New Objective ───");
126
146
  lines.push("");
127
- lines.push(args.newObjective);
147
+ for (const newLine of args.newObjective.split("\n")) {
148
+ const trimmed = newLine.trim();
149
+ if (!trimmed) continue;
150
+ if (trimmed.startsWith("│")) {
151
+ lines.push(newLine);
152
+ } else {
153
+ lines.push(`│ ${newLine}`);
154
+ }
155
+ }
128
156
  return lines.join("\n");
129
157
  }
130
158
 
@@ -185,7 +185,7 @@ export function validateTaskCompletion(args: {
185
185
  }): PolicyValidation {
186
186
  if (!args.goal) return { ok: false, message: "No goal is set." };
187
187
  if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
188
- const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
188
+ const task = findTaskInTree(args.goal.taskList.tasks, args.taskId);
189
189
  if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
190
190
  if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
191
191
  if (task.status === "skipped") return { ok: false, message: `Task "${args.taskId}" was already skipped.` };
@@ -199,7 +199,7 @@ export function validateTaskSkip(args: {
199
199
  }): PolicyValidation {
200
200
  if (!args.goal) return { ok: false, message: "No goal is set." };
201
201
  if (!args.goal.taskList) return { ok: false, message: "Goal has no task list." };
202
- const task = args.goal.taskList.tasks.find((t) => t.id === args.taskId);
202
+ const task = findTaskInTree(args.goal.taskList.tasks, args.taskId);
203
203
  if (!task) return { ok: false, message: `Task "${args.taskId}" not found.` };
204
204
  if (task.status === "complete") return { ok: false, message: `Task "${args.taskId}" is already complete.` };
205
205
  // Skipped tasks toggle via the executor; reason is only required for first-time skips.
@@ -241,6 +241,20 @@ export function findSubtaskDepthViolation(tasks: GoalTask[], maxDepth: number):
241
241
  return undefined;
242
242
  }
243
243
 
244
+ function checkDuplicateTaskIds(tasks: GoalTask[], ids: Set<string>): string | undefined {
245
+ for (const t of tasks) {
246
+ const id = t.id.trim();
247
+ if (!id) return "All tasks must have a non-empty id.";
248
+ if (ids.has(id)) return `Duplicate task id: "${id}".`;
249
+ ids.add(id);
250
+ if (t.subtasks) {
251
+ const childErr = checkDuplicateTaskIds(t.subtasks, ids);
252
+ if (childErr) return childErr;
253
+ }
254
+ }
255
+ return undefined;
256
+ }
257
+
244
258
  export function validateTaskListProposal(args: {
245
259
  goal: GoalPolicyRecordLike | null;
246
260
  tasks: GoalTask[];
@@ -254,6 +268,11 @@ export function validateTaskListProposal(args: {
254
268
  if (!t.title.trim()) return { ok: false, message: `Task "${t.id}" must have a non-empty title.` };
255
269
  if (ids.has(t.id)) return { ok: false, message: `Duplicate task id: "${t.id}".` };
256
270
  ids.add(t.id);
271
+ // Recursively check subtask ids against the same global set
272
+ if (t.subtasks && t.subtasks.length > 0) {
273
+ const childErr = checkDuplicateTaskIds(t.subtasks, ids);
274
+ if (childErr) return { ok: false, message: childErr };
275
+ }
257
276
  }
258
277
  // Check subtask depth limit
259
278
  const maxDepth = args.maxSubtaskDepth ?? 1;
@@ -26,6 +26,7 @@ export interface GoalQuestionnaireResult {
26
26
  questions: GoalQuestionnaireQuestion[];
27
27
  answers: GoalQuestionnaireAnswer[];
28
28
  cancelled: boolean;
29
+ auditorEnabled?: boolean;
29
30
  }
30
31
 
31
32
  export type ProposalDecision = "confirm" | "continue";
@@ -82,7 +83,7 @@ export function proposalDialogFailureMessage(error: unknown): string {
82
83
  * the internal draft-confirm prompt. This keeps pi-goal self-contained and
83
84
  * avoids depending on external question/questionnaire packages.
84
85
  */
85
- export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions: GoalQuestionnaireQuestion[]): Promise<GoalQuestionnaireResult> {
86
+ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions: GoalQuestionnaireQuestion[], auditorToggleInit?: { defaultEnabled: boolean }): Promise<GoalQuestionnaireResult> {
86
87
  if (!ctx.hasUI) {
87
88
  return { questions: [], answers: [], cancelled: true };
88
89
  }
@@ -102,6 +103,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
102
103
  let inputMode = false;
103
104
  let inputQuestionId: string | null = null;
104
105
  let cachedLines: string[] | undefined;
106
+ let auditorEnabled = auditorToggleInit?.defaultEnabled ?? true;
105
107
  const answers = new Map<string, GoalQuestionnaireAnswer>();
106
108
  const drafts = new Map<string, string>();
107
109
 
@@ -126,7 +128,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
126
128
  // Restore hardware cursor now that the dialog is closing
127
129
  tui.setShowHardwareCursor(wasHardwareCursorShown);
128
130
  const ordered = questions.map((q) => answers.get(q.id)).filter((a): a is GoalQuestionnaireAnswer => !!a);
129
- done({ questions, answers: ordered, cancelled });
131
+ done({ questions, answers: ordered, cancelled, auditorEnabled: auditorToggleInit ? auditorEnabled : undefined });
130
132
  }
131
133
 
132
134
  function currentQuestion(): GoalQuestionnaireQuestion | undefined {
@@ -272,6 +274,13 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
272
274
  return;
273
275
  }
274
276
 
277
+ // Auditor toggle hotkey
278
+ if (matchesKey(data, "a") && auditorToggleInit) {
279
+ auditorEnabled = !auditorEnabled;
280
+ refresh();
281
+ return;
282
+ }
283
+
275
284
  if (matchesKey(data, Key.enter) && q) {
276
285
  if (q.options.length === 0 || opts[optionIndex]?.isCustom) {
277
286
  inputMode = true;
@@ -293,7 +302,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
293
302
  if (matchesKey(data, Key.escape)) submit(true);
294
303
  }
295
304
 
296
- function render(width: number): string[] {
305
+ function render(width: number): string[] {
297
306
  if (cachedLines) return cachedLines;
298
307
  const safeWidth = Math.max(20, width);
299
308
  const lines: string[] = [];
@@ -301,6 +310,111 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
301
310
  const opts = displayOptions();
302
311
  const add = (s: string) => lines.push(truncateToWidth(s, safeWidth, "…", true));
303
312
  const addWrapped = (s: string) => lines.push(...wrapTextWithAnsi(s, safeWidth));
313
+ /**
314
+ * Wraps a pipe-prefixed line and prepends "│ " to continuation lines
315
+ * so wrapped content stays within the ASCII box.
316
+ */
317
+ const addWrappedPipe = (styledLine: string) => {
318
+ const wrapped = wrapTextWithAnsi(styledLine, safeWidth);
319
+ for (let i = 0; i < wrapped.length; i++) {
320
+ lines.push(i === 0 ? wrapped[i] : "│ " + wrapped[i]);
321
+ }
322
+ };
323
+
324
+ /** Render context lines with per-line styling. No truncation. */
325
+ const renderContextLines = (context: string): void => {
326
+ const rawLines = context.split("\n");
327
+ for (const rawLine of rawLines) {
328
+ const trimmed = rawLine.trim();
329
+ // Empty line — preserve as spacing
330
+ if (!trimmed) {
331
+ lines.push("");
332
+ continue;
333
+ }
334
+
335
+ // 1. Announcement header — "● Goal draft/tweak ready for confirmation."
336
+ if (/^● Goal (draft|tweak) ready for confirmation\.$/.test(trimmed)) {
337
+ addWrapped(theme.fg("accent", rawLine));
338
+ continue;
339
+ }
340
+
341
+ // 2. Section marker — "─── Name ───" → full-width box-drawing header
342
+ const sectionMatch = trimmed.match(/^───\s+(.+?)\s+───$/);
343
+ if (sectionMatch) {
344
+ const sectionName = sectionMatch[1];
345
+ const namePart = ` ${sectionName} `;
346
+ const left = "┌─";
347
+ const right = "─┐";
348
+ const fill = Math.max(0, safeWidth - 2 - visibleWidth(left + namePart + right));
349
+ add(theme.fg("accent", left + namePart + "─".repeat(fill) + right));
350
+ continue;
351
+ }
352
+
353
+ // 3. Lines with │ prefix come from buildDraftConfirmationText / buildTweakConfirmationText.
354
+ if (trimmed.startsWith("│")) {
355
+ const afterPipe = trimmed.slice(1).trim();
356
+ // 3a. Task checkbox under │ prefix — detect before key-value to avoid
357
+ // "[x] t1: ..." being misinterpreted as a key-value pair.
358
+ const pipeTaskMatch = afterPipe.match(/^(\[.\])(\s+)(.+)$/);
359
+ if (pipeTaskMatch) {
360
+ const bracket = pipeTaskMatch[1];
361
+ const sep = pipeTaskMatch[2];
362
+ const rest = pipeTaskMatch[3];
363
+ // Preserve inner whitespace between │ and the task marker (e.g. " " in "│ [x]...")
364
+ const pipeContent = trimmed.slice(1);
365
+ const innerWs = pipeContent.slice(0, pipeContent.length - pipeContent.trimStart().length);
366
+ const linePrefix = "│" + innerWs;
367
+ const color = bracket === "[x]" ? "success" : "warning";
368
+ addWrappedPipe(linePrefix + theme.fg(color, bracket) + sep + theme.fg("muted", rest));
369
+ continue;
370
+ }
371
+ // 3b. Key-value content (e.g. "│ Mode: Normal goal", "│ Auto-continue: yes")
372
+ if (afterPipe.includes(": ")) {
373
+ const colonIdx = afterPipe.indexOf(": ");
374
+ const val = afterPipe.slice(colonIdx + 2).trim();
375
+ const keyPart = rawLine.slice(0, rawLine.indexOf(afterPipe) + colonIdx + 2);
376
+ if (val === "yes" || val === "no") {
377
+ addWrappedPipe(theme.fg("muted", keyPart) + theme.fg(val === "yes" ? "success" : "warning", val));
378
+ continue;
379
+ }
380
+ addWrappedPipe(theme.fg("muted", rawLine));
381
+ continue;
382
+ }
383
+ // 3c. Generic content under │ prefix (topic, goal text, etc.)
384
+ addWrappedPipe(theme.fg("muted", rawLine));
385
+ continue;
386
+ }
387
+
388
+ // 4. Goal objective structure lines — detected before task checkboxes
389
+ // because === Goal could overlap with ─── markers but we already checked those.
390
+ const GOAL_SECTION_RE = /^(=== (Goal|Sisyphus Goal) ===|Objective:|Success criteria:|Boundaries:|Constraints:|Verification contract:|If blocked:)/;
391
+ if (GOAL_SECTION_RE.test(trimmed)) {
392
+ addWrapped(theme.fg("accent", rawLine));
393
+ continue;
394
+ }
395
+
396
+ // 5. Actual box-drawing borders (┌ └ ├ └ ┐ ┤ ┘ ─) — NOT │ which is handled above
397
+ if (/^[┌├└┐┤┘─]/.test(trimmed)) {
398
+ addWrapped(theme.fg("dim", rawLine));
399
+ continue;
400
+ }
401
+
402
+ // 6. Task checkbox item — "[ ] ...", "[x] ...", or "[~] ..." (with optional indent)
403
+ const checkMatch = trimmed.match(/^(\[.\])(\s+)(.+)$/);
404
+ if (checkMatch) {
405
+ const bracket = checkMatch[1];
406
+ const sep = checkMatch[2];
407
+ const rest = checkMatch[3];
408
+ const indent = rawLine.slice(0, rawLine.length - trimmed.length);
409
+ const color = bracket === "[x]" ? "success" : "warning";
410
+ addWrapped(indent + theme.fg(color, bracket) + sep + theme.fg("muted", rest));
411
+ continue;
412
+ }
413
+
414
+ // 7. Default: any remaining content (fallback)
415
+ addWrapped(theme.fg("muted", rawLine));
416
+ }
417
+ };
304
418
 
305
419
  add(theme.fg("accent", "─".repeat(safeWidth)));
306
420
  if (isMulti) {
@@ -331,7 +445,7 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
331
445
 
332
446
  if (inputMode && q) {
333
447
  addWrapped(theme.fg("text", ` ${q.question}`));
334
- if (q.context) addWrapped(theme.fg("muted", ` ${q.context}`));
448
+ if (q.context) renderContextLines(q.context);
335
449
  lines.push("");
336
450
  if (q.options.length > 0) {
337
451
  renderOptions();
@@ -352,7 +466,15 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
352
466
  add(allAnswered() ? theme.fg("success", " Press Enter to submit") : theme.fg("warning", ` Unanswered: ${questions.filter((qq) => !answers.has(qq.id)).map((qq) => qq.id).join(", ")}`));
353
467
  } else if (q) {
354
468
  addWrapped(theme.fg("text", ` ${q.question}`));
355
- if (q.context) addWrapped(theme.fg("muted", ` ${q.context}`));
469
+ if (q.context) renderContextLines(q.context);
470
+ // Auditor toggle line between context and options
471
+ if (auditorToggleInit) {
472
+ const circle = auditorEnabled ? "●" : "○";
473
+ const label = auditorEnabled ? "Auditor enabled" : "Auditor disabled";
474
+ const color = auditorEnabled ? "success" : "warning";
475
+ add(theme.fg(color, ` ${circle} ${label}`) + theme.fg("dim", " (press 'a' to toggle)"));
476
+ lines.push("");
477
+ }
356
478
  const existing = answers.get(q.id);
357
479
  if (existing) add(theme.fg("dim", ` Current: ${existing.wasCustom ? "(wrote) " : ""}${existing.answer}`));
358
480
  lines.push("");
@@ -361,7 +483,10 @@ export async function runGoalQuestionnaire(ctx: ExtensionContext, rawQuestions:
361
483
  }
362
484
 
363
485
  lines.push("");
364
- if (!inputMode) add(theme.fg("dim", isMulti ? " Tab/←→ navigate • ↑↓ select • Enter confirm • Esc cancel" : " ↑↓ navigate • Enter select • Esc cancel"));
486
+ if (!inputMode) {
487
+ const auditorHint = auditorToggleInit ? " • a toggle auditor" : "";
488
+ add(theme.fg("dim", isMulti ? " Tab/←→ navigate • ↑↓ select • Enter confirm • Esc cancel" + auditorHint : " ↑↓ navigate • Enter select • Esc cancel" + auditorHint));
489
+ }
365
490
  add(theme.fg("accent", "─".repeat(safeWidth)));
366
491
  cachedLines = lines;
367
492
  return lines;
@@ -379,7 +504,8 @@ export async function showProposalDialog(
379
504
  ctx: ExtensionContext,
380
505
  confirmationText: string,
381
506
  focus: GoalDraftingFocus,
382
- ): Promise<ProposalDecision> {
507
+ defaultAuditorEnabled?: boolean,
508
+ ): Promise<{ decision: ProposalDecision; auditorEnabled: boolean }> {
383
509
  const headerTitle = focus === "sisyphus" ? "Confirm Sisyphus Goal Draft" : "Confirm Goal Draft";
384
510
  const result = await runGoalQuestionnaire(ctx, [{
385
511
  id: "confirm",
@@ -388,11 +514,12 @@ export async function showProposalDialog(
388
514
  options: ["Confirm — create this goal now", "Continue chatting — keep refining"],
389
515
  recommended: 0,
390
516
  allowCustom: false,
391
- }]);
392
- return proposalDecisionFromQuestionnaireResult({
517
+ }], defaultAuditorEnabled !== undefined ? { defaultEnabled: defaultAuditorEnabled } : undefined);
518
+ const decision = proposalDecisionFromQuestionnaireResult({
393
519
  cancelled: result.cancelled,
394
520
  answer: result.answers[0]?.answer,
395
521
  });
522
+ return { decision, auditorEnabled: result.auditorEnabled ?? true };
396
523
  }
397
524
 
398
525
  export function registerQuestionnaireTools(pi: ExtensionAPI): void {
@@ -45,6 +45,7 @@ export interface GoalRecord {
45
45
  // Set by the agent's pause_goal tool. Cleared when the goal becomes active again.
46
46
  pauseReason?: string;
47
47
  pauseSuggestedAction?: string;
48
+ skipAuditor?: boolean;
48
49
  taskList?: GoalTaskList;
49
50
  /** Plain-text description of what verification evidence is required before completing this goal. */
50
51
  verificationContract?: string;
@@ -247,6 +248,7 @@ export function normalizeGoalRecord(value: unknown): GoalRecord | null {
247
248
  stopReason: raw.stopReason === "agent" || raw.stopReason === "user" ? raw.stopReason : undefined,
248
249
  pauseReason: typeof raw.pauseReason === "string" && raw.pauseReason.trim() ? raw.pauseReason : undefined,
249
250
  pauseSuggestedAction: typeof raw.pauseSuggestedAction === "string" && raw.pauseSuggestedAction.trim() ? raw.pauseSuggestedAction : undefined,
251
+ skipAuditor: raw.skipAuditor === true ? true : undefined,
250
252
  taskList: normalizeTaskList(raw.taskList),
251
253
  verificationContract: typeof raw.verificationContract === "string" ? raw.verificationContract : undefined,
252
254
  };
@@ -143,6 +143,14 @@ export function loadGoalSettings(cwd: string, env: NodeJS.ProcessEnv = process.e
143
143
  * Save settings to the unified settings file on disk.
144
144
  * Persists only non-default values using the canonical key names.
145
145
  */
146
+ /**
147
+ * Determine whether the auditor should be enabled by default based on settings.
148
+ * The auditor is enabled by default unless settings.disabled === true.
149
+ */
150
+ export function isAuditorEnabledByDefault(settings: GoalSettings): boolean {
151
+ return settings.disabled !== true;
152
+ }
153
+
146
154
  export function saveGoalSettingsFileConfig(cwd: string, settings: GoalSettings): GoalSettings {
147
155
  const clean: GoalSettings = {};
148
156
  const provider = asNonEmptyString(settings.provider);