npm - @wazir-dev/cli - Versions diffs - 1.2.0 → 1.3.0 - Mend

@wazir-dev/cli 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/CHANGELOG.md +39 -44
package/README.md +13 -13
package/assets/demo.cast +47 -0
package/assets/demo.gif +0 -0
package/docs/anti-patterns/AP-23-skipping-enabled-workflows.md +28 -0
package/docs/anti-patterns/AP-24-clarifier-deciding-scope.md +34 -0
package/docs/concepts/architecture.md +1 -1
package/docs/concepts/why-wazir.md +1 -1
package/docs/readmes/INDEX.md +1 -1
package/docs/readmes/features/expertise/README.md +1 -1
package/docs/readmes/features/hooks/pre-compact-summary.md +1 -1
package/docs/reference/hooks.md +1 -0
package/docs/reference/launch-checklist.md +3 -3
package/docs/reference/review-loop-pattern.md +3 -2
package/docs/reference/skill-tiers.md +2 -2
package/expertise/antipatterns/process/ai-coding-antipatterns.md +117 -0
package/exports/hosts/claude/.claude/commands/plan-review.md +3 -1
package/exports/hosts/claude/.claude/commands/verify.md +30 -1
package/exports/hosts/claude/export.manifest.json +2 -2
package/exports/hosts/codex/export.manifest.json +2 -2
package/exports/hosts/cursor/export.manifest.json +2 -2
package/exports/hosts/gemini/export.manifest.json +2 -2
package/llms-full.txt +48 -18
package/package.json +2 -3
package/schemas/phase-report.schema.json +9 -0
package/skills/brainstorming/SKILL.md +14 -2
package/skills/clarifier/SKILL.md +189 -35
package/skills/executor/SKILL.md +67 -0
package/skills/init-pipeline/SKILL.md +0 -1
package/skills/reviewer/SKILL.md +86 -13
package/skills/self-audit/SKILL.md +20 -0
package/skills/skill-research/SKILL.md +188 -0
package/skills/verification/SKILL.md +41 -3
package/skills/wazir/SKILL.md +304 -38
package/tooling/src/capture/command.js +17 -1
package/tooling/src/capture/store.js +32 -0
package/tooling/src/capture/user-input.js +66 -0
package/tooling/src/checks/security-sensitivity.js +69 -0
package/tooling/src/cli.js +28 -26
package/tooling/src/guards/phase-prerequisite-guard.js +58 -0
package/tooling/src/init/auto-detect.js +0 -2
package/tooling/src/init/command.js +3 -95
package/tooling/src/status/command.js +6 -1
package/tooling/src/verify/proof-collector.js +299 -0
package/workflows/plan-review.md +3 -1
package/workflows/verify.md +30 -1

package/skills/wazir/SKILL.md CHANGED Viewed

@@ -9,6 +9,16 @@ The user typed `/wazir <their request>`. Run the entire pipeline end-to-end, han
 All questions use **numbered interactive options** — one question at a time, defaults marked "(Recommended)", wait for user response before proceeding.
+## User Input Capture
+After every user response (approval, correction, rejection, redirect, instruction), capture it:
+```
+captureUserInput(runDir, { phase: '<current-phase>', type: '<instruction|approval|correction|rejection|redirect>', content: '<user message>', context: '<what prompted the question>' })
+```
+This uses `tooling/src/capture/user-input.js`. The log at `user-input-log.ndjson` feeds the learning system — user corrections are the strongest signal for improvement. At run end, prune logs older than 10 runs via `pruneOldInputLogs(stateRoot, 10)`.
 ## Command Routing
 Follow the Canonical Command Matrix in `hooks/routing-matrix.json`.
 - Large commands (test runners, builds, diffs, dependency trees, linting) → context-mode tools
@@ -82,6 +92,9 @@ Parse the request for inline modifiers before the main text:
 Recognized modifiers:
 - **Depth:** `quick`, `deep` (standard is default when omitted)
+- **Interaction mode:** `auto`, `interactive` (guided is default when omitted)
+  - `/wazir auto fix the auth bug` → interaction_mode = auto
+  - `/wazir interactive design the onboarding` → interaction_mode = interactive
 - **Intent:** `bugfix`, `feature`, `refactor`, `docs`, `spike`
 ## Step 2: Check Prerequisites
@@ -93,11 +106,14 @@ Run `which wazir` to check if the CLI is installed.
 **If not installed**, present:
 > **The Wazir CLI is not installed. It's required for event capture, validation, and indexing.**
->
-> **How would you like to install it?**
->
-> 1. **npm** (Recommended) — `npm install -g @wazir-dev/cli`
-> 2. **Local link** — `npm link` from the Wazir project root
+Ask the user via AskUserQuestion:
+- **Question:** "The Wazir CLI is not installed. How would you like to install it?"
+- **Options:**
+  1. "npm install -g @wazir-dev/cli" *(Recommended)*
+  2. "npm link from the Wazir project root"
+Wait for the user's selection before continuing.
 The CLI is **required** — the pipeline uses `wazir capture`, `wazir validate`, `wazir index`, and `wazir doctor` throughout execution.
@@ -109,9 +125,14 @@ Run `wazir validate branches` to check the current git branch.
 - If on `main` or `develop`:
   > You're on **[branch]**. The pipeline requires a feature branch.
-  >
-  > 1. **Create feat/<slug>** (Recommended) — branch from current
-  > 2. **Continue on [branch]** — not recommended for feature/refactor work
+  Ask the user via AskUserQuestion:
+  - **Question:** "You're on a protected branch. Create a feature branch?"
+  - **Options:**
+    1. "Create feat/<slug> from current branch" *(Recommended)*
+    2. "Continue on current branch — not recommended"
+  Wait for the user's selection before continuing.
 ### Index Check
@@ -154,9 +175,14 @@ Check if a previous incomplete run exists (via `latest` symlink pointing to a ru
 **If previous incomplete run found**, present:
 > **A previous incomplete run was detected:** `<previous-run-id>`
->
-> 1. **Resume** (Recommended) — continue from the last completed phase
-> 2. **Start fresh** — create a new empty run
+Ask the user via AskUserQuestion:
+- **Question:** "A previous incomplete run was detected. Resume or start fresh?"
+- **Options:**
+  1. "Resume from the last completed phase" *(Recommended)*
+  2. "Start fresh with a new empty run"
+Wait for the user's selection before continuing.
 **If Resume:**
 - Copy `clarified/` from previous run into new run, EXCEPT `user-feedback.md`.
@@ -196,8 +222,7 @@ parsed_intent: feature
 entry_point: "/wazir"
 depth: standard
-team_mode: sequential
-parallel_backend: none
+interaction_mode: guided  # auto | guided | interactive
 # Workflow policy — individual workflows within each phase
 workflow_policy:
@@ -247,18 +272,124 @@ After building run config:
   > **Running: standard depth, feature, sequential. Proceeding...**
 - **Low confidence** — show plan and ask:
-  > **Does this look right?**
-  > 1. **Yes, proceed** (Recommended)
-  > 2. **No, let me adjust**
+  Ask the user via AskUserQuestion:
+  - **Question:** "Does this run configuration look right?"
+  - **Options:**
+    1. "Yes, proceed" *(Recommended)*
+    2. "No, let me adjust"
+  Wait for the user's selection before continuing.
 ```bash
 wazir capture event --run <run-id> --event phase_exit --phase init --status completed
 ```
+Run the phase report and display it to the user:
+```bash
+wazir report phase --run <run-id> --phase init
+```
+Output the report content to the user in the conversation.
+---
+# Interaction Modes
+The `interaction_mode` field in run-config controls how the pipeline interacts with the user:
+| Mode | Inline modifier | Behavior | Best for |
+|------|----------------|----------|----------|
+| **`guided`** | (default) | Pipeline runs, pauses at phase checkpoints for user approval. Current default behavior. | Most work |
+| **`auto`** | `/wazir auto ...` | No human checkpoints. Codex reviews all. Gating agent decides continue/loop_back/escalate. Stops ONLY on escalate. | Overnight, clear spec, well-understood domain |
+| **`interactive`** | `/wazir interactive ...` | More questions, more discussion, co-designs with user. Researcher presents options. Executor checks approach before coding. | Ambiguous requirements, new domain, learning |
+## `auto` mode constraints
+- **Codex REQUIRED** — refuse to start auto mode if `multi_tool.codex` is not configured in `.wazir/state/config.json`. Error: "Auto mode requires an external reviewer (Codex). Configure it first or use guided mode."
+- **On escalate:** STOP immediately, write the escalation reason to `.wazir/runs/<id>/escalations/`, and wait for user input
+- **Wall-clock limit:** default 4 hours. If exceeded, stop with escalation.
+- **Never auto-commits to main** — always work on feature branch
+- All checkpoints (AskUserQuestion) are skipped — gating agent evaluates phase reports and decides
+## `guided` mode (default)
+Current behavior — no changes needed. Checkpoints at phase boundaries, user approves before advancing.
+## `interactive` mode
+- **Clarifier:** asks more detailed questions, presents research findings with options: "I found 3 approaches — which interests you?"
+- **Executor:** checks approach before coding: "I'm about to implement auth with Supabase — sound right?"
+- **Reviewer:** discusses findings with user, not just presents verdict: "I found a potential auth bypass — here's why I think it's high severity, do you agree?"
+- Slower but highest quality for complex/ambiguous work
+## Mode checking in phase skills
+All phase skills check `interaction_mode` from run-config at every checkpoint:
+```
+# Read from run-config
+interaction_mode = run_config.interaction_mode ?? 'guided'
+# At each checkpoint:
+if interaction_mode == 'auto':
+    # Skip checkpoint, let gating agent decide
+elif interaction_mode == 'interactive':
+    # More detailed question, present options, discuss
+else:
+    # guided — standard checkpoint with AskUserQuestion
+```
+---
+# Two-Level Phase Model
+The pipeline has 4 top-level **phases**, each containing multiple **workflows** with review loops:
+```
+Phase 1: Init
+  └── (inline — no sub-workflows)
+Phase 2: Clarifier
+  ├── discover (research) ← research-review loop
+  ├── clarify ← clarification-review loop
+  ├── specify ← spec-challenge loop
+  ├── author (adaptive) ← approval gate
+  ├── design ← design-review loop
+  └── plan ← plan-review loop
+Phase 3: Executor
+  ├── execute (per-task) ← task-review loop per task
+  └── verify
+Phase 4: Final Review
+  ├── review (final) ← scored review
+  ├── learn
+  └── prepare_next
+```
+**Event capture uses both levels.** When emitting phase events, include `--parent-phase`:
+```bash
+wazir capture event --run <id> --event phase_enter --phase discover --parent-phase clarifier --status in_progress
+```
+**Progress markers between workflows:** After each workflow completes, output:
+> Phase 2: Clarifier > Workflow: specify (3 of 6 workflows complete)
+**`wazir status` shows both levels:** "Phase 2: Clarifier > Workflow: specify"
 ---
 # Phase 2: Clarifier
+**Before starting this phase, output to the user:**
+> **Clarifier Phase** — About to research your codebase, clarify requirements, harden the spec, brainstorm designs, and produce an execution plan.
+>
+> **Why this matters:** Without this, I'd guess your tech stack, misunderstand constraints, miss edge cases in the spec, and build the wrong architecture. Every ambiguity left unresolved here becomes a bug or rework cycle later.
+>
+> **Looking for:** Unstated assumptions, scope boundaries, conflicting requirements, missing acceptance criteria
 ```bash
 wazir capture event --run <run-id> --event phase_enter --phase clarifier --status in_progress
 ```
@@ -280,14 +411,43 @@ Each sub-workflow has its own review loop. User checkpoints between major steps.
 Output: approved spec + design + execution plan in `.wazir/runs/latest/clarified/`.
+**After completing this phase, output to the user:**
+> **Clarifier Phase complete.**
+>
+> **Found:** [N] ambiguities resolved, [N] assumptions made explicit, [N] scope boundaries drawn, [N] acceptance criteria hardened
+>
+> **Without this phase:** Requirements would be interpreted differently across tasks, acceptance criteria would be vague and untestable, the design would be ad-hoc, and the plan would miss dependency ordering
+>
+> **Changed because of this work:** [List spec tightening changes, resolved questions, design decisions, scope adjustments]
 ```bash
 wazir capture event --run <run-id> --event phase_exit --phase clarifier --status completed
 ```
+Run the phase report and display savings to the user:
+```bash
+wazir report phase --run <run-id> --phase clarifier
+wazir stats --run <run-id>
+```
+**Show savings in conversation output:**
+> **Context savings this phase:** Used wazir index for [N] queries and context-mode for [M] commands, saving ~[X] tokens ([Y]% reduction). Without these, this phase would have consumed [A] tokens instead of [B].
+Output the report content to the user in the conversation.
 ---
 # Phase 3: Executor
+**Before starting this phase, output to the user:**
+> **Executor Phase** — About to implement [N] tasks in dependency order with TDD (test-first), per-task code review, and verification before each commit.
+>
+> **Why this matters:** Without this discipline, tests get skipped, edge cases get missed, integration points break silently, and review catches problems too late when they're expensive to fix.
+>
+> **Looking for:** Correct dependency ordering, test coverage for each task, clean per-task review passes, no implementation drift from the approved plan
 ## Phase Gate (Hard Gate)
 Before entering the Executor phase, verify ALL clarifier artifacts exist:
@@ -328,14 +488,43 @@ Tasks always run sequentially.
 Output: code changes + verification proof in `.wazir/runs/latest/artifacts/`.
+**After completing this phase, output to the user:**
+> **Executor Phase complete.**
+>
+> **Found:** [N]/[N] tasks implemented, [N] tests written, [N] per-task review passes completed, [N] findings fixed before commit
+>
+> **Without this phase:** Code would ship without tests, review findings would accumulate until final review (10x more expensive to fix), and verification claims would be unsubstantiated
+>
+> **Changed because of this work:** [List of commits with conventional commit messages, test counts, verification evidence collected]
 ```bash
 wazir capture event --run <run-id> --event phase_exit --phase executor --status completed
 ```
+Run the phase report and display savings to the user:
+```bash
+wazir report phase --run <run-id> --phase executor
+wazir stats --run <run-id>
+```
+Output the report content to the user in the conversation.
+**Show savings in conversation output:**
+> **Context savings this phase:** Used wazir index for [N] queries and context-mode for [M] commands, saving ~[X] tokens ([Y]% reduction).
 ---
 # Phase 4: Final Review
+**Before starting this phase, output to the user:**
+> **Final Review Phase** — About to run adversarial 7-dimension review comparing the implementation against your original input, extract durable learnings, and prepare the handoff.
+>
+> **Why this matters:** Without this, implementation drift ships undetected, missing acceptance criteria go unnoticed, untested code paths hide bugs, and the same mistakes repeat in the next run.
+>
+> **Looking for:** Spec violations, missing features, dead code paths, unsubstantiated claims, scope creep, security gaps, stale documentation
 ## Phase Gate (Hard Gate)
 Before entering the Final Review phase, verify the Executor produced its proof:
@@ -375,10 +564,27 @@ Prepare context and handoff for the next run:
 - Compress/archive unneeded files
 - Record what's left to do
+**After completing this phase, output to the user:**
+> **Final Review Phase complete.**
+>
+> **Found:** [N] findings across 7 dimensions, [N] blocking issues, [N] warnings, [N] learnings proposed for future runs
+>
+> **Without this phase:** Implementation drift from the original request would ship undetected, untested paths would hide production bugs, and recurring mistakes would never get captured as learnings
+>
+> **Changed because of this work:** [List of findings fixed, score achieved, learnings extracted, handoff prepared]
 ```bash
 wazir capture event --run <run-id> --event phase_exit --phase final_review --status completed
 ```
+Run the phase report and display it to the user:
+```bash
+wazir report phase --run <run-id> --phase final_review
+```
+Output the report content to the user in the conversation.
 ---
 ## Step 5: CHANGELOG + Gitflow Validation (Hard Gates)
@@ -399,26 +605,41 @@ After the reviewer completes, present verdict with numbered options:
 ### If PASS (score 56+):
 > **Result: PASS (score/70)**
->
-> 1. **Create a PR** (Recommended)
-> 2. **Merge directly**
-> 3. **Review the changes first**
+Ask the user via AskUserQuestion:
+- **Question:** "Pipeline passed. What would you like to do next?"
+- **Options:**
+  1. "Create a PR" *(Recommended)*
+  2. "Merge directly"
+  3. "Review the changes first"
+Wait for the user's selection before continuing.
 ### If NEEDS MINOR FIXES (score 42-55):
 > **Result: NEEDS MINOR FIXES (score/70)**
->
-> 1. **Auto-fix and re-review** (Recommended)
-> 2. **Fix manually**
-> 3. **Accept as-is**
+Ask the user via AskUserQuestion:
+- **Question:** "Minor issues found. How should we handle them?"
+- **Options:**
+  1. "Auto-fix and re-review" *(Recommended)*
+  2. "Fix manually"
+  3. "Accept as-is"
+Wait for the user's selection before continuing.
 ### If NEEDS REWORK (score 28-41):
 > **Result: NEEDS REWORK (score/70)**
->
-> 1. **Re-run affected tasks** (Recommended)
-> 2. **Review findings in detail**
-> 3. **Abandon this run**
+Ask the user via AskUserQuestion:
+- **Question:** "Significant issues found. How should we proceed?"
+- **Options:**
+  1. "Re-run affected tasks" *(Recommended)*
+  2. "Review findings in detail"
+  3. "Abandon this run"
+Wait for the user's selection before continuing.
 ### If FAIL (score 0-27):
@@ -438,10 +659,15 @@ wazir status --run <run-id> --json
 If any phase fails:
 > **Phase [name] failed: [reason]**
->
-> 1. **Retry this phase** (Recommended)
-> 2. **Skip and continue** (only if workflows within phase are adaptive)
-> 3. **Abort the run**
+Ask the user via AskUserQuestion:
+- **Question:** "Phase [name] failed: [reason]. How should we proceed?"
+- **Options:**
+  1. "Retry this phase" *(Recommended)*
+  2. "Skip and continue" *(only if workflows within phase are adaptive)*
+  3. "Abort the run"
+Wait for the user's selection before continuing.
 ---
@@ -455,9 +681,14 @@ Parse inline audit types: `/wazir audit security` → skip Question 1.
 After audit:
-> 1. **Review the findings** (Recommended)
-> 2. **Generate a fix plan**
-> 3. **Run the pipeline on the fix plan**
+Ask the user via AskUserQuestion:
+- **Question:** "Audit complete. What would you like to do with the findings?"
+- **Options:**
+  1. "Review the findings" *(Recommended)*
+  2. "Generate a fix plan"
+  3. "Run the pipeline on the fix plan"
+Wait for the user's selection before continuing.
 If option 3, save findings as briefing and run pipeline with intent = `bugfix`.
@@ -471,12 +702,47 @@ Generates a PRD from a completed run. Reads approved design, task specs, executi
 After generation:
-> 1. **Review the PRD** (Recommended)
-> 2. **Commit it**
-> 3. **Edit before committing**
+Ask the user via AskUserQuestion:
+- **Question:** "PRD generated. What would you like to do?"
+- **Options:**
+  1. "Review the PRD" *(Recommended)*
+  2. "Commit it"
+  3. "Edit before committing"
+Wait for the user's selection before continuing.
 ---
+## Reasoning Chain Output
+Every phase produces reasoning output at two layers:
+### Layer 1: Conversation Output (concise — for the user)
+Before each major decision, output one trigger sentence and one reasoning sentence:
+> "Your request mentions 'overnight autonomous run' — researching how Devin and Karpathy's autoresearch handle this, because unattended runs need different safety constraints than interactive ones."
+After each phase, output what was found and a counterfactual:
+> "Found: you use Supabase auth (not custom JWT). If I'd skipped research, I would have built JWT middleware — completely wrong."
+### Layer 2: File Output (detailed — for learning and reports)
+Save full reasoning chain to `.wazir/runs/<id>/reasoning/phase-<name>-reasoning.md` with entries:
+```markdown
+### Decision: [title]
+- **Trigger:** What prompted this decision
+- **Options considered:** List of alternatives
+- **Chosen:** The selected option
+- **Reasoning:** Why this option was chosen
+- **Confidence:** high | medium | low
+- **Counterfactual:** What would have gone wrong without this information
+```
+Create the `reasoning/` directory during run init. Every phase skill (clarifier, executor, reviewer) writes its own reasoning file. Counterfactuals appear in BOTH conversation output AND reasoning files.
 ## Interaction Rules
 - **One question at a time** — never combine multiple questions

package/tooling/src/capture/command.js CHANGED Viewed

@@ -3,6 +3,7 @@ import path from 'node:path';
 import { parseCommandOptions, parsePositiveInteger } from '../command-options.js';
 import { readYamlFile } from '../loaders.js';
+import { validateRunCompletion } from '../guards/phase-prerequisite-guard.js';
 import { findProjectRoot } from '../project-root.js';
 import { resolveStateRoot } from '../state-root.js';
 import {
@@ -57,7 +58,7 @@ function resolveCaptureContext(parsed, context = {}) {
   const projectRoot = findProjectRoot(context.cwd ?? process.cwd());
   const manifest = readYamlFile(path.join(projectRoot, 'wazir.manifest.yaml'));
   const { options } = parseCommandOptions(parsed.args, {
-    boolean: ['json'],
+    boolean: ['json', 'complete'],
     string: [
       'run',
       'phase',
@@ -326,6 +327,21 @@ function handleSummary(parsed, context = {}) {
   const runPaths = getRunPaths(stateRoot, options.run);
   const status = readStatus(runPaths);
+  // Enforce workflow completion before allowing summary to finalize
+  if (options.complete) {
+    const projectRoot = findProjectRoot();
+    const manifestPath = path.join(projectRoot, 'wazir.manifest.yaml');
+    const result = validateRunCompletion(runPaths.runRoot, manifestPath);
+    if (!result.complete) {
+      const msg = `Run incomplete: ${result.missing.length} workflow(s) not finished: ${result.missing.join(', ')}`;
+      if (options.json) {
+        return { exitCode: 1, stdout: JSON.stringify({ run_id: options.run, complete: false, missing_workflows: result.missing, error: msg }, null, 2) + '\n' };
+      }
+      return { exitCode: 1, stderr: msg + '\n' };
+    }
+  }
   const eventName = options.event ?? 'pre_compact_summary';
   const summaryContent = readInput();
   const summaryPath = writeSummary(runPaths, summaryContent);

package/tooling/src/capture/store.js CHANGED Viewed

@@ -116,6 +116,38 @@ export function readPhaseExitEvents(runPaths) {
   return completedPhases;
 }
+/**
+ * Read phase exit events with full two-level detail (parent_phase + workflow).
+ */
+export function readPhaseExitEventsDetailed(runPaths) {
+  if (!fs.existsSync(runPaths.eventsPath)) {
+    return [];
+  }
+  const content = fs.readFileSync(runPaths.eventsPath, 'utf8');
+  const events = [];
+  for (const line of content.split('\n')) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    try {
+      const event = JSON.parse(trimmed);
+      if (event.event === 'phase_exit' && event.phase) {
+        events.push({
+          phase: event.phase,
+          parent_phase: event.parent_phase ?? event.phase,
+          workflow: event.workflow ?? event.phase,
+          status: event.status,
+        });
+      }
+    } catch {
+      // Skip malformed lines
+    }
+  }
+  return events;
+}
 export function writeSummary(runPaths, content) {
   ensureRunDirectories(runPaths);
   fs.writeFileSync(runPaths.summaryPath, content);

package/tooling/src/capture/user-input.js ADDED Viewed

@@ -0,0 +1,66 @@
+import fs from 'node:fs';
+import path from 'node:path';
+/**
+ * Append a user input entry to the run's NDJSON log.
+ *
+ * @param {string} runDir - Absolute path to the run directory
+ * @param {object} entry - { phase, type, content, context }
+ *   type: 'instruction' | 'approval' | 'correction' | 'rejection' | 'redirect'
+ */
+export function captureUserInput(runDir, { phase, type, content, context }) {
+  const logPath = path.join(runDir, 'user-input-log.ndjson');
+  const record = {
+    timestamp: new Date().toISOString(),
+    phase: phase ?? 'unknown',
+    type: type ?? 'instruction',
+    content: content ?? '',
+    context: context ?? '',
+  };
+  fs.appendFileSync(logPath, JSON.stringify(record) + '\n');
+  return logPath;
+}
+/**
+ * Read all entries from a run's user input log.
+ */
+export function readUserInputLog(runDir) {
+  const logPath = path.join(runDir, 'user-input-log.ndjson');
+  if (!fs.existsSync(logPath)) return [];
+  return fs.readFileSync(logPath, 'utf8')
+    .split('\n')
+    .filter(line => line.trim())
+    .map(line => {
+      try { return JSON.parse(line); }
+      catch { return null; }
+    })
+    .filter(Boolean);
+}
+/**
+ * Prune old user-input-log.ndjson files, keeping the most recent `keep` runs.
+ *
+ * @param {string} stateRoot - Absolute path to the state root (e.g. ~/.wazir/projects/foo)
+ * @param {number} keep - Number of recent runs to keep (default 10)
+ */
+export function pruneOldInputLogs(stateRoot, keep = 10) {
+  const runsDir = path.join(stateRoot, 'runs');
+  if (!fs.existsSync(runsDir)) return { pruned: 0 };
+  const entries = fs.readdirSync(runsDir)
+    .filter(name => name.startsWith('run-') && fs.statSync(path.join(runsDir, name)).isDirectory())
+    .sort()
+    .reverse();
+  let pruned = 0;
+  for (let i = keep; i < entries.length; i++) {
+    const logPath = path.join(runsDir, entries[i], 'user-input-log.ndjson');
+    if (fs.existsSync(logPath)) {
+      fs.unlinkSync(logPath);
+      pruned++;
+    }
+  }
+  return { pruned };
+}