npm - @prowi/deskcheck - Versions diffs - 0.3.0 → 0.4.0 - Mend

@prowi/deskcheck 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/README.md +78 -84
package/build/cli.js +129 -23
package/build/cli.js.map +1 -1
package/build/config/loader.d.ts.map +1 -1
package/build/config/loader.js +2 -1
package/build/config/loader.js.map +1 -1
package/build/config/types.d.ts +2 -1
package/build/config/types.d.ts.map +1 -1
package/build/mcp/tools.d.ts.map +1 -1
package/build/mcp/tools.js +25 -49
package/build/mcp/tools.js.map +1 -1
package/build/prompts/ExecutorPrompt.d.ts +4 -2
package/build/prompts/ExecutorPrompt.d.ts.map +1 -1
package/build/prompts/ExecutorPrompt.js +40 -33
package/build/prompts/ExecutorPrompt.js.map +1 -1
package/build/prompts/PartitionerPrompt.d.ts +12 -0
package/build/prompts/PartitionerPrompt.d.ts.map +1 -0
package/build/prompts/PartitionerPrompt.js +54 -0
package/build/prompts/PartitionerPrompt.js.map +1 -0
package/build/prompts/ResolverPrompt.d.ts +11 -0
package/build/prompts/ResolverPrompt.d.ts.map +1 -0
package/build/prompts/ResolverPrompt.js +45 -0
package/build/prompts/ResolverPrompt.js.map +1 -0
package/build/renderers/review/MarkdownRenderer.js +5 -2
package/build/renderers/review/MarkdownRenderer.js.map +1 -1
package/build/renderers/review/TerminalRenderer.js +5 -2
package/build/renderers/review/TerminalRenderer.js.map +1 -1
package/build/renderers/review/WatchRenderer.d.ts.map +1 -1
package/build/renderers/review/WatchRenderer.js +10 -1
package/build/renderers/review/WatchRenderer.js.map +1 -1
package/build/server/controllers/ReviewController.d.ts +12 -3
package/build/server/controllers/ReviewController.d.ts.map +1 -1
package/build/server/controllers/ReviewController.js +50 -6
package/build/server/controllers/ReviewController.js.map +1 -1
package/build/server/server.d.ts.map +1 -1
package/build/server/server.js +22 -1
package/build/server/server.js.map +1 -1
package/build/services/ExecutorService.d.ts +17 -2
package/build/services/ExecutorService.d.ts.map +1 -1
package/build/services/ExecutorService.js +37 -5
package/build/services/ExecutorService.js.map +1 -1
package/build/services/FindingsParserService.d.ts +1 -8
package/build/services/FindingsParserService.d.ts.map +1 -1
package/build/services/FindingsParserService.js +20 -45
package/build/services/FindingsParserService.js.map +1 -1
package/build/services/criteria/module-parser.d.ts +1 -1
package/build/services/criteria/module-parser.d.ts.map +1 -1
package/build/services/criteria/module-parser.js +20 -16
package/build/services/criteria/module-parser.js.map +1 -1
package/build/services/review/CodeSnippetService.d.ts +10 -0
package/build/services/review/CodeSnippetService.d.ts.map +1 -0
package/build/services/review/CodeSnippetService.js +54 -0
package/build/services/review/CodeSnippetService.js.map +1 -0
package/build/services/review/ReviewInputResolverService.d.ts +25 -0
package/build/services/review/ReviewInputResolverService.d.ts.map +1 -0
package/build/services/review/ReviewInputResolverService.js +106 -0
package/build/services/review/ReviewInputResolverService.js.map +1 -0
package/build/services/review/ReviewOrchestratorService.d.ts.map +1 -1
package/build/services/review/ReviewOrchestratorService.js +21 -20
package/build/services/review/ReviewOrchestratorService.js.map +1 -1
package/build/services/review/ReviewPartitionerService.d.ts +46 -0
package/build/services/review/ReviewPartitionerService.d.ts.map +1 -0
package/build/services/review/ReviewPartitionerService.js +208 -0
package/build/services/review/ReviewPartitionerService.js.map +1 -0
package/build/services/review/ReviewPlanBuilderService.d.ts +25 -7
package/build/services/review/ReviewPlanBuilderService.d.ts.map +1 -1
package/build/services/review/ReviewPlanBuilderService.js +88 -30
package/build/services/review/ReviewPlanBuilderService.js.map +1 -1
package/build/services/review/ReviewStorageService.d.ts +34 -10
package/build/services/review/ReviewStorageService.d.ts.map +1 -1
package/build/services/review/ReviewStorageService.js +100 -14
package/build/services/review/ReviewStorageService.js.map +1 -1
package/build/services/testing/TestRunnerService.d.ts.map +1 -1
package/build/services/testing/TestRunnerService.js +10 -8
package/build/services/testing/TestRunnerService.js.map +1 -1
package/build/types/criteria.d.ts +8 -6
package/build/types/criteria.d.ts.map +1 -1
package/build/types/review.d.ts +123 -28
package/build/types/review.d.ts.map +1 -1
package/package.json +3 -1
package/ui/dist/index.html +12 -63
package/build/prompts/PlannerPrompt.d.ts +0 -12
package/build/prompts/PlannerPrompt.d.ts.map +0 -1
package/build/prompts/PlannerPrompt.js +0 -34
package/build/prompts/PlannerPrompt.js.map +0 -1
package/build/services/review/ReviewContextExtractorService.d.ts +0 -17
package/build/services/review/ReviewContextExtractorService.d.ts.map +0 -1
package/build/services/review/ReviewContextExtractorService.js +0 -69
package/build/services/review/ReviewContextExtractorService.js.map +0 -1
package/build/services/review/ReviewPlannerService.d.ts +0 -29
package/build/services/review/ReviewPlannerService.d.ts.map +0 -1
package/build/services/review/ReviewPlannerService.js +0 -122
package/build/services/review/ReviewPlannerService.js.map +0 -1

package/README.md CHANGED Viewed

@@ -2,8 +2,6 @@
 Modular code review powered by Claude. Define what to check as markdown, deskcheck runs each check in a fresh AI agent, and aggregates the findings.
-![Run overview](docs/screenshots/run-overview.png)
 ## Why deskcheck?
 Traditional code review tools leave a gap:
@@ -12,11 +10,11 @@ Traditional code review tools leave a gap:
 - **Linters** verify syntax — they can't tell you "this endpoint is missing input validation"
 - **A single LLM** reviewing a whole branch suffers **context rot** — as its context fills up, it starts missing the patterns it's supposed to catch
-Deskcheck solves this by breaking every review into the smallest possible unit: **one file + one criterion + one fresh agent**. Each agent gets a clean context with only the code it needs and the specific rules to check. Results are aggregated mechanically.
+Deskcheck solves this by breaking every review into the smallest possible unit. A **partitioner agent** reads your criterion and decides how to split the matched files into focused subtasks. Each subtask runs in a **fresh reviewer agent** with clean context — only the code it needs and the specific rules to check. Results are aggregated mechanically.
 ```
-Your code + Criteria → N executor agents → Aggregated findings
-                        (fresh context each)
+Your code + Criteria → Partitioner → N reviewer agents → Aggregated findings
+                       (per-criterion)  (fresh context each)
 ```
 ## Quick Start
@@ -31,8 +29,8 @@ deskcheck init
 # Review your branch changes against main
 deskcheck diff main
-# Review a specific file
-deskcheck "src/services/PaymentService.ts"
+# Review with natural language
+deskcheck "review src/services/"
 # Open the web dashboard
 deskcheck serve
@@ -42,15 +40,15 @@ deskcheck serve
 ### 1. You define criteria as markdown
-Each criterion is a markdown file with YAML frontmatter that says **what to check**, **which files to check**, and **how important it is**:
+Each criterion is a markdown file with YAML frontmatter that says **what to check**, **which files to check**, and **how to partition the work**:
 ```yaml
 ---
 description: "Checks for common security vulnerabilities"
-severity: critical
 globs:
   - "src/**/*.ts"
   - "!src/**/*.test.ts"
+partition: one task per file
 model: sonnet
 ---
@@ -60,7 +58,7 @@ You are a security reviewer. Check for:
 2. **SQL injection** — string concatenation in database queries
 3. **Missing input validation** — user input used without sanitization
-For each issue, report the severity, file, line number, and a fix suggestion.
+For each issue, report the severity, file, line range, and a fix suggestion.
 ```
 Put criteria in `deskcheck/criteria/` — organize them however you like:
@@ -75,56 +73,52 @@ deskcheck/criteria/
     └── error-handling.md
 ```
-### 2. Deskcheck matches criteria to your files
-Each criterion has `globs` that define which files it applies to. When you run `deskcheck diff main`, it gets the list of changed files and matches them against every criterion's globs. Each match becomes a **task**: one file + one criterion.
-### 3. Each task runs in a fresh agent
+### 2. The pipeline runs in four steps
-Every task is executed by a new Claude agent with only:
-- The file content (or diff)
-- The criterion's instructions
-- Access to read tools (Read, Glob, Grep) for additional context
-No context leakage between tasks. A fresh agent reviewing one file against one set of rules catches issues with near-100% reliability.
+```
+Matching → Partitioning → Reviewing → Complete
+```
-### 4. Findings are aggregated
+1. **Matching** — files from `git diff` (or a natural-language file list) are matched against each criterion's glob patterns. Programmatic, no LLM.
+2. **Partitioning** — for each matched criterion, a fresh agent reads the `partition` instruction and splits the matched files into focused subtasks. The partitioner can inspect files with Read/Grep/Glob to make informed decisions (e.g. "one method per task" requires reading the file to list methods).
+3. **Reviewing** — each subtask runs in a fresh reviewer agent with only the criterion instructions, the assigned files, and the scope. Reviewers fetch their own context from disk (diffs for change-mode, full files for all-mode). Up to 5 run concurrently.
+4. **Complete** — issues are aggregated by file, criterion, and severity.
-Results are grouped by file, criterion, and severity. You can browse them in the terminal, as markdown (for PR comments), as JSON (for tooling), or in the **web dashboard**:
+### 3. Findings are aggregated
-![File detail view](docs/screenshots/file-detail.png)
+Results are grouped and viewable in the terminal, as markdown (for PR comments), as JSON (for tooling), or in the **web dashboard** (`deskcheck serve`). The dashboard shows a four-step pipeline bar, per-criterion subtask breakdowns, partitioner reasoning, and issue cards with code snippets and suggested fixes.
 ## CLI Commands
 ### `deskcheck diff [git-args...]`
-Deterministic review of git changes. No LLM planner — passes args directly to `git diff`.
+Deterministic review of git changes. No LLM resolver — scope and file list are derived directly from `git diff`.
 ```bash
-deskcheck diff main                                      # Changes vs main
-deskcheck diff --staged                                  # Staged changes only
-deskcheck diff HEAD~3                                    # Last 3 commits
-deskcheck diff main -- src/services/                     # Scoped to a directory
-deskcheck diff main --dry-run                            # Preview plan without executing
-deskcheck diff main --fail-on=critical                   # Exit 1 if critical findings (for CI)
-deskcheck diff main --format=markdown                    # Markdown output (for PR comments)
-deskcheck diff main --criteria=dto-enforcement           # Only run one criterion
-deskcheck diff main --criteria=security,naming           # Only run specific criteria
+deskcheck diff                          # Working tree vs HEAD (staged + unstaged)
+deskcheck diff main                     # Changes vs main
+deskcheck diff HEAD~3                   # Last 3 commits
+deskcheck diff main --dry-run           # Preview plan (runs partitioners) without executing reviewers
+deskcheck diff main --fail-on=critical  # Exit 1 if critical findings (for CI)
+deskcheck diff main --format=markdown   # Markdown output (for PR comments)
+deskcheck diff main --criteria=security # Only run one criterion
 ```
 ### `deskcheck "<prompt>"`
-Natural language review — an LLM agent interprets what you want to check.
+Natural-language review — a resolver agent interprets what you want to check and produces a `{ scope, files }` pair, then the same downstream pipeline runs.
 ```bash
-deskcheck "src/services/OrderService.ts"
+deskcheck "review src/services/"
 deskcheck "check the auth module"
-deskcheck "the calculate method in Commission.ts"
+deskcheck "review changes against develop"
+deskcheck "review src/" --scope changes:main     # Override resolver's scope inference
+deskcheck "review src/" --criteria=security       # Only run specific criteria
 ```
 ### `deskcheck serve`
-Web dashboard with live updates. Shows all runs, task progress, usage/cost tracking, and findings with filtering.
+Web dashboard with live updates via SSE. Four views: run list, review overview (pipeline + criteria + issues), criterion detail (partitioner reasoning + subtask list), and subtask detail (issue cards with code).
 ```bash
 deskcheck serve              # Start on default port (3000)
@@ -144,7 +138,7 @@ deskcheck show --fail-on=warning      # Exit 1 if warnings or worse
 ### `deskcheck watch [plan-id]`
-Live terminal tree view of a run in progress.
+Live terminal tree view of a run in progress. Shows partition decisions and per-subtask `[focus]` annotations.
 ### `deskcheck list`
@@ -160,32 +154,10 @@ deskcheck test controller-conventions             # Run tests for one criterion
 deskcheck test --criteria=dto-enforcement,naming  # Run tests for specific criteria
 ```
-Test fixtures live in `deskcheck/tests/` mirroring the criteria directory structure. Each test case has a fixture file (code to review) and an `expected.md` (what should be found). An LLM judge compares actual findings against expectations and produces scores:
-- **Recall** — Were expected violations found?
-- **Precision** — Were all findings legitimate?
-- **Scope compliance** — Did every finding come from the criterion's checklist?
-Results are persisted in `.deskcheck/test-runs/` for inspection.
 ### `deskcheck init`
 Scaffold config and criteria directory for a new project.
-## Web Dashboard
-Start with `deskcheck serve` and open `http://localhost:3000`.
-**Run overview** — progress bar, usage/cost tracking, sortable task table with severity filters, and file coverage:
-![Run overview](docs/screenshots/run-overview.png)
-**File detail** — click any file to see all findings across criteria, with severity filtering and grouping options:
-![File detail](docs/screenshots/file-detail.png)
-The dashboard uses SSE for live updates — watch tasks complete in real time during execution.
 ## Criterion Reference
 ### Frontmatter Fields
@@ -193,10 +165,10 @@ The dashboard uses SSE for live updates — watch tasks complete in real time du
 | Field | Required | Default | Description |
 |-------|----------|---------|-------------|
 | `description` | Yes | — | Human-readable description shown in reports |
-| `severity` | Yes | — | Importance: `critical`, `high`, `medium`, `low` |
 | `globs` | Yes | — | File patterns to match. Prefix with `!` to exclude |
-| `mode` | No | `"One task per file"` | How to split files into tasks (natural language) |
-| `model` | No | `"haiku"` | Claude model: `haiku`, `sonnet`, `opus` |
+| `partition` | No | `"one task per matched file"` | Natural-language instruction for how the partitioner agent should split matched files into subtasks |
+| `model` | No | `"haiku"` | Claude model for reviewer agents: `haiku`, `sonnet`, `opus` |
+| `tools` | No | `[]` | Extra tools available to reviewers for this criterion (e.g. `["WebFetch"]`), layered on top of built-ins |
 ### Choosing the Right Model
@@ -212,19 +184,32 @@ The dashboard uses SSE for live updates — watch tasks complete in real time du
 ### The Detective Prompt
-The markdown body below the frontmatter is the **detective prompt** — instructions given to each executor agent. Include:
+The markdown body below the frontmatter is the **detective prompt** — instructions given to each reviewer agent. Include:
 - **What to check** — specific patterns and violations
 - **What NOT to check** — exclusions to reduce false positives
 - **Severity guidance** — when to report critical vs warning vs info
-The agent has read access to the project, so your prompt can reference other files:
+Reviewers have built-in tools (Read, Grep, Glob, Bash) and fetch their own context based on the scope, so your prompt can reference other files:
 ```markdown
 Read `.eslintrc.js` to understand the project's linting config.
 Then check for architectural patterns that ESLint can't catch.
 ```
+### Partition Instruction
+The `partition` field tells the partitioner agent how to split matched files into subtasks. Examples:
+```yaml
+partition: one task per file               # Simple, default-like
+partition: one public method per task       # Sub-file: same file appears in multiple subtasks with different focus
+partition: group each test with its source  # Cross-file grouping
+partition: bundle all controllers together  # Single grouped review
+```
+The partitioner agent reads this instruction, inspects the matched files using its tools, and produces subtasks with `files`, optional `focus` (sub-file narrowing), and optional `hint` (reasoning for the grouping).
 ## Configuration
 Configuration lives in `.deskcheck/config.json` (created by `deskcheck init`):
@@ -239,7 +224,8 @@ Configuration lives in `.deskcheck/config.json` (created by `deskcheck init`):
     "mcp_servers": {}
   },
   "agents": {
-    "planner": { "model": "haiku" },
+    "resolver": { "model": "haiku" },
+    "partitioner": { "model": "haiku" },
     "executor": {},
     "evaluator": { "model": "haiku" },
     "judge": { "model": "opus" }
@@ -247,8 +233,10 @@ Configuration lives in `.deskcheck/config.json` (created by `deskcheck init`):
 }
 ```
-- The **executor model** comes from each criterion's `model` field, not from config. This lets cheap checks use `haiku` and important checks use `sonnet`.
-- The **judge model** (used by `deskcheck test`) defaults to `opus` for accurate evaluation of findings against expectations.
+- **Built-in reviewer tools** (`Read`, `Grep`, `Glob`, `Bash`) are always available regardless of `shared.allowed_tools`. The config tools layer on top.
+- The **reviewer model** comes from each criterion's `model` field, not from config.
+- The **partitioner model** comes from `agents.partitioner.model` (shared across all criteria).
+- The **resolver model** (for natural-language `deskcheck "<prompt>"`) comes from `agents.resolver.model`.
 ## CI Integration
@@ -280,26 +268,36 @@ Deskcheck can run as an MCP server for Claude Code integration:
 }
 ```
-## Usage Tracking
+## Demo & Development
+### Seed fixtures for UI work (free, no API calls)
+```bash
+npm run seed -- --clean     # Write 5 synthetic plans exercising every UI state
+deskcheck serve             # http://localhost:3000
+```
+### Run a real review against the demo project (~5–15¢)
+```bash
+cd examples/demo-project
+git init -q && git add -A && git commit -qm init   # one-time setup
+deskcheck "review src/"                              # runs resolver + partitioners + reviewers
+```
-Every run tracks token usage and cost per task. The web dashboard shows totals (cost, input/output tokens) and per-task breakdowns, so you can see exactly how much each review costs and which criteria are most expensive.
+See [`examples/demo-project/README.md`](examples/demo-project/README.md) for the planted issues and expected findings.
-## Development
+### Development setup
 The fastest way to get started is with the included **Dev Container** (VS Code + Docker):
 1. Open the repo in VS Code
-2. When prompted, click **"Reopen in Container"** (or run `Dev Containers: Reopen in Container` from the command palette)
+2. When prompted, click **"Reopen in Container"**
 3. Press **Ctrl+Shift+B** to launch the dev environment
-This starts three processes in a single terminal group:
-- **Backend server** on port 3000 (builds TypeScript, then runs `deskcheck serve`)
-- **TypeScript watch** (`tsc --watch` for backend changes)
-- **Vite dev server** on port 5173 (Vue UI with hot reload)
-Open `http://localhost:5173` for UI development — API requests are proxied to the backend automatically.
+This starts backend server (port 3000), TypeScript watch, and Vite dev server (port 5173).
-### Without Dev Container
+Without Dev Container:
 ```bash
 # Terminal 1: backend
@@ -312,10 +310,6 @@ npm run dev
 cd ui && npm install && npm run dev
 ```
-## Disclaimer
-This tool was vibe-coded in a single day using [Claude Code](https://claude.ai/claude-code). The architecture, implementation, web UI, and even this README were built through conversation with Claude Opus 4.6. It works, we use it, but it hasn't been battle-tested at scale. Expect rough edges. Contributions welcome.
 ## License
 MIT

package/build/cli.js CHANGED Viewed

@@ -7,7 +7,8 @@ import { loadConfig, DEFAULT_CONFIG } from "./config/loader.js";
 import { ReviewStorageService } from "./services/review/ReviewStorageService.js";
 import { discoverModules, filterModules } from "./services/criteria/module-parser.js";
 import { buildPlanWithTasks } from "./services/review/ReviewPlanBuilderService.js";
-import { ReviewPlannerService } from "./services/review/ReviewPlannerService.js";
+import { ReviewInputResolverService } from "./services/review/ReviewInputResolverService.js";
+import { ReviewPartitionerService } from "./services/review/ReviewPartitionerService.js";
 import { ReviewOrchestratorService } from "./services/review/ReviewOrchestratorService.js";
 import { renderTerminal } from "./renderers/review/TerminalRenderer.js";
 import { renderMarkdown } from "./renderers/review/MarkdownRenderer.js";
@@ -32,6 +33,14 @@ const RESET = "\x1b[0m";
 function resolveProjectRoot() {
     return process.cwd();
 }
+/** Build the PlanInvocation snapshot for storage from the current process. */
+function captureInvocation(projectRoot) {
+    return {
+        command: "deskcheck",
+        args: process.argv.slice(2),
+        cwd: projectRoot,
+    };
+}
 function formatFindingsSummary(results) {
     const { critical, warning, info, total } = results.summary;
     if (total === 0)
@@ -219,9 +228,14 @@ async function diffCommand(gitArgs, options) {
     const config = loadConfig(projectRoot);
     const storageDir = path.join(projectRoot, config.storage_dir);
     const storage = new ReviewStorageService(storageDir);
-    // Get changed files via git diff
-    // Insert --name-only right after "diff" so it comes before any -- path separators
-    const gitDiffArgs = ["diff", "--name-only", ...gitArgs];
+    // Resolve the diff ref. The first positional (non-flag) arg becomes the ref;
+    // with no positional, default to HEAD. This is the same ref the reviewer
+    // will use later (`git diff <ref> -- <file>`), so file discovery and the
+    // reviewer's per-file diffs see the same baseline. Bare `deskcheck diff`
+    // therefore reviews working-tree-vs-HEAD = staged + unstaged combined.
+    const ref = gitArgs.find((a) => !a.startsWith("-")) ?? "HEAD";
+    const passthrough = gitArgs.filter((a) => a !== ref);
+    const gitDiffArgs = ["diff", "--name-only", ref, ...passthrough];
     let fileOutput;
     try {
         fileOutput = execFileSync("git", gitDiffArgs, {
@@ -247,12 +261,23 @@ async function diffCommand(gitArgs, options) {
         const patterns = options.criteria.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
         modules = filterModules(modules, patterns);
     }
-    // Build a human-readable name from git args
-    const diffTarget = gitArgs.filter((a) => !a.startsWith("--")).join(" ") || "working tree";
-    const planName = `diff: ${diffTarget}`;
-    const sourceTarget = gitArgs[0] ?? "HEAD";
-    const source = { type: "diff", target: sourceTarget };
-    const plan = buildPlanWithTasks(storage, planName, source, files, modules);
+    // Build a human-readable plan name and the structured scope.
+    const planName = `diff: ${ref}`;
+    const scope = { type: "changes", ref };
+    const invocation = captureInvocation(projectRoot);
+    const partitioner = new ReviewPartitionerService(config, projectRoot);
+    const plan = await buildPlanWithTasks(storage, partitioner, planName, scope, invocation, files, modules, {
+        onMatchingComplete: (criteriaCount, fileCount) => {
+            console.log(`${DIM}  Matching: ${criteriaCount} criteria matched ${fileCount} file(s)${RESET}`);
+            if (criteriaCount > 0) {
+                console.log(`${DIM}  Partitioning...${RESET}`);
+            }
+        },
+        onPartitionCompleted: (decision) => {
+            const name = decision.review_id.split("/").pop() ?? decision.review_id;
+            console.log(`${DIM}    ${name}: ${decision.subtasks.length} subtask(s) from ${decision.matched_files.length} file(s)${RESET}`);
+        },
+    });
     printPlanSummary(plan);
     if (options.dryRun) {
         console.log(`${DIM}  Dry run — plan created but not executed.${RESET}`);
@@ -263,32 +288,113 @@ async function diffCommand(gitArgs, options) {
         console.log(`${DIM}  No criteria matched the changed files.${RESET}`);
         process.exit(0);
     }
-    // Execute
+    // Execute. If the orchestrator throws (per-task errors are handled
+    // internally and don't escape), stamp the failure on the plan first.
     const orchestrator = new ReviewOrchestratorService(config, projectRoot);
-    await executeAndPrint(orchestrator, plan.plan_id);
+    try {
+        await executeAndPrint(orchestrator, plan.plan_id);
+    }
+    catch (err) {
+        storage.setFailure(plan.plan_id, {
+            step: "reviewing",
+            review_id: null,
+            message: err instanceof Error ? err.message : String(err),
+        });
+        throw err;
+    }
     // Render results
     const finalPlan = storage.getPlan(plan.plan_id);
     const results = storage.getResults(plan.plan_id);
     console.log(renderOutput(results, finalPlan, options.format));
     process.exit(checkFailOn(results, options.failOn));
 }
-/** Default command — natural language deskcheck via LLM planner. */
+/**
+ * Parse the `--scope` flag value into a structured Scope.
+ *
+ * Accepted forms:
+ *   all                   → { type: "all" }
+ *   changes               → { type: "changes", ref: "HEAD" }
+ *   changes:<ref>         → { type: "changes", ref: "<ref>" }
+ */
+function parseScopeFlag(value) {
+    const trimmed = value.trim();
+    if (trimmed === "all")
+        return { type: "all" };
+    if (trimmed === "changes")
+        return { type: "changes", ref: "HEAD" };
+    if (trimmed.startsWith("changes:")) {
+        const ref = trimmed.slice("changes:".length).trim();
+        if (!ref)
+            throw new Error(`--scope changes: requires a ref (e.g. changes:main)`);
+        return { type: "changes", ref };
+    }
+    throw new Error(`Invalid --scope value: "${value}". Expected "all", "changes", or "changes:<ref>".`);
+}
+/** Default command — natural-language deskcheck via the input resolver agent. */
 async function deskchecCommand(prompt, options) {
     const projectRoot = resolveProjectRoot();
     const config = loadConfig(projectRoot);
     const storageDir = path.join(projectRoot, config.storage_dir);
-    console.log(`${DIM}Planning...${RESET}`);
+    const storage = new ReviewStorageService(storageDir);
+    const scopeOverride = options.scope ? parseScopeFlag(options.scope) : undefined;
     const criteriaFilter = options.criteria
         ? options.criteria.split(",").map((s) => s.trim()).filter((s) => s.length > 0)
         : undefined;
-    const planner = new ReviewPlannerService(config, projectRoot);
-    const plan = await planner.plan(prompt, criteriaFilter);
+    // Step 1: resolve { scope, files } from natural language.
+    console.log(`${DIM}Resolving...${RESET}`);
+    const resolver = new ReviewInputResolverService(config, projectRoot);
+    const { scope, files } = await resolver.resolve(prompt, scopeOverride);
+    // Step 2: discover and filter criteria (programmatic, no LLM).
+    const modulesDir = path.resolve(projectRoot, config.modules_dir);
+    let modules = discoverModules(modulesDir);
+    if (criteriaFilter) {
+        modules = filterModules(modules, criteriaFilter);
+    }
+    const invocation = captureInvocation(projectRoot);
+    // Empty file list → empty plan with a friendly message, exit clean.
+    if (files.length === 0) {
+        const emptyPlan = storage.createPlan(prompt, scope, invocation);
+        storage.setMatchedFiles(emptyPlan.plan_id, [], []);
+        storage.finalizePlan(emptyPlan.plan_id);
+        console.log("");
+        console.log(`${DIM}  No files matched the request. Nothing to review.${RESET}`);
+        console.log(`${DIM}  Plan ID: ${emptyPlan.plan_id}${RESET}`);
+        process.exit(0);
+    }
+    // Step 3: build the plan (glob match → partition → tasks).
+    const partitioner = new ReviewPartitionerService(config, projectRoot);
+    const plan = await buildPlanWithTasks(storage, partitioner, prompt, scope, invocation, files, modules, {
+        onMatchingComplete: (criteriaCount, fileCount) => {
+            console.log(`${DIM}  Matching: ${criteriaCount} criteria matched ${fileCount} file(s)${RESET}`);
+            if (criteriaCount > 0) {
+                console.log(`${DIM}  Partitioning...${RESET}`);
+            }
+        },
+        onPartitionCompleted: (decision) => {
+            const name = decision.review_id.split("/").pop() ?? decision.review_id;
+            console.log(`${DIM}    ${name}: ${decision.subtasks.length} subtask(s) from ${decision.matched_files.length} file(s)${RESET}`);
+        },
+    });
     printPlanSummary(plan);
-    // Execute
+    if (Object.keys(plan.tasks).length === 0) {
+        console.log(`${DIM}  No criteria matched the resolved files.${RESET}`);
+        process.exit(0);
+    }
+    // Step 4: execute reviewers. If the orchestrator throws, mark the plan
+    // as failed at the reviewing step before re-raising.
     const orchestrator = new ReviewOrchestratorService(config, projectRoot);
-    await executeAndPrint(orchestrator, plan.plan_id);
-    // Render results
-    const storage = new ReviewStorageService(storageDir);
+    try {
+        await executeAndPrint(orchestrator, plan.plan_id);
+    }
+    catch (err) {
+        storage.setFailure(plan.plan_id, {
+            step: "reviewing",
+            review_id: null,
+            message: err instanceof Error ? err.message : String(err),
+        });
+        throw err;
+    }
+    // Step 5: render.
     const finalPlan = storage.getPlan(plan.plan_id);
     const results = storage.getResults(plan.plan_id);
     console.log(renderTerminal(results, finalPlan));
@@ -405,12 +511,13 @@ const program = new Command();
 program
     .name("deskcheck")
     .description("Modular code deskcheck tool powered by Claude")
-    .version("0.1.0");
+    .version("0.4.0");
 // Default command: natural language deskcheck
 program
     .argument("[prompt]", "What to check (natural language)")
     .option("--fail-on <severities>", "Exit non-zero if findings match: critical, warning, info (comma-separated)")
     .option("--criteria <names>", "Only run specific criteria (comma-separated, e.g. dto-enforcement,controller-conventions)")
+    .option("--scope <value>", "Override resolver scope inference: 'all', 'changes', or 'changes:<ref>'")
     .action(async (prompt, options) => {
     if (!prompt) {
         program.help();
@@ -436,10 +543,9 @@ program
     .option("--criteria <names>", "Only run specific criteria (comma-separated, e.g. dto-enforcement,controller-conventions)")
     .addHelpText("after", `
 Examples:
+  deskcheck diff                      Check working tree vs HEAD (staged + unstaged)
   deskcheck diff develop              Check changes vs develop branch
-  deskcheck diff --staged             Check staged changes
   deskcheck diff HEAD~3               Check last 3 commits
-  deskcheck diff main -- app/         Check changes in app/ vs main
   deskcheck diff develop --dry-run    Show plan without executing
   deskcheck diff develop --fail-on=critical  Exit non-zero on critical findings
   deskcheck diff develop --criteria=dto-enforcement  Only run one criterion