@prowi/deskcheck 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +78 -84
  2. package/build/cli.js +129 -23
  3. package/build/cli.js.map +1 -1
  4. package/build/config/loader.d.ts.map +1 -1
  5. package/build/config/loader.js +2 -1
  6. package/build/config/loader.js.map +1 -1
  7. package/build/config/types.d.ts +2 -1
  8. package/build/config/types.d.ts.map +1 -1
  9. package/build/mcp/tools.d.ts.map +1 -1
  10. package/build/mcp/tools.js +25 -49
  11. package/build/mcp/tools.js.map +1 -1
  12. package/build/prompts/ExecutorPrompt.d.ts +4 -2
  13. package/build/prompts/ExecutorPrompt.d.ts.map +1 -1
  14. package/build/prompts/ExecutorPrompt.js +40 -33
  15. package/build/prompts/ExecutorPrompt.js.map +1 -1
  16. package/build/prompts/PartitionerPrompt.d.ts +12 -0
  17. package/build/prompts/PartitionerPrompt.d.ts.map +1 -0
  18. package/build/prompts/PartitionerPrompt.js +54 -0
  19. package/build/prompts/PartitionerPrompt.js.map +1 -0
  20. package/build/prompts/ResolverPrompt.d.ts +11 -0
  21. package/build/prompts/ResolverPrompt.d.ts.map +1 -0
  22. package/build/prompts/ResolverPrompt.js +45 -0
  23. package/build/prompts/ResolverPrompt.js.map +1 -0
  24. package/build/renderers/review/MarkdownRenderer.js +5 -2
  25. package/build/renderers/review/MarkdownRenderer.js.map +1 -1
  26. package/build/renderers/review/TerminalRenderer.js +5 -2
  27. package/build/renderers/review/TerminalRenderer.js.map +1 -1
  28. package/build/renderers/review/WatchRenderer.d.ts.map +1 -1
  29. package/build/renderers/review/WatchRenderer.js +10 -1
  30. package/build/renderers/review/WatchRenderer.js.map +1 -1
  31. package/build/server/controllers/ReviewController.d.ts +12 -3
  32. package/build/server/controllers/ReviewController.d.ts.map +1 -1
  33. package/build/server/controllers/ReviewController.js +50 -6
  34. package/build/server/controllers/ReviewController.js.map +1 -1
  35. package/build/server/server.d.ts.map +1 -1
  36. package/build/server/server.js +22 -1
  37. package/build/server/server.js.map +1 -1
  38. package/build/services/ExecutorService.d.ts +17 -2
  39. package/build/services/ExecutorService.d.ts.map +1 -1
  40. package/build/services/ExecutorService.js +37 -5
  41. package/build/services/ExecutorService.js.map +1 -1
  42. package/build/services/FindingsParserService.d.ts +1 -8
  43. package/build/services/FindingsParserService.d.ts.map +1 -1
  44. package/build/services/FindingsParserService.js +20 -45
  45. package/build/services/FindingsParserService.js.map +1 -1
  46. package/build/services/criteria/module-parser.d.ts +1 -1
  47. package/build/services/criteria/module-parser.d.ts.map +1 -1
  48. package/build/services/criteria/module-parser.js +20 -16
  49. package/build/services/criteria/module-parser.js.map +1 -1
  50. package/build/services/review/CodeSnippetService.d.ts +10 -0
  51. package/build/services/review/CodeSnippetService.d.ts.map +1 -0
  52. package/build/services/review/CodeSnippetService.js +54 -0
  53. package/build/services/review/CodeSnippetService.js.map +1 -0
  54. package/build/services/review/ReviewInputResolverService.d.ts +25 -0
  55. package/build/services/review/ReviewInputResolverService.d.ts.map +1 -0
  56. package/build/services/review/ReviewInputResolverService.js +106 -0
  57. package/build/services/review/ReviewInputResolverService.js.map +1 -0
  58. package/build/services/review/ReviewOrchestratorService.d.ts.map +1 -1
  59. package/build/services/review/ReviewOrchestratorService.js +21 -20
  60. package/build/services/review/ReviewOrchestratorService.js.map +1 -1
  61. package/build/services/review/ReviewPartitionerService.d.ts +46 -0
  62. package/build/services/review/ReviewPartitionerService.d.ts.map +1 -0
  63. package/build/services/review/ReviewPartitionerService.js +208 -0
  64. package/build/services/review/ReviewPartitionerService.js.map +1 -0
  65. package/build/services/review/ReviewPlanBuilderService.d.ts +25 -7
  66. package/build/services/review/ReviewPlanBuilderService.d.ts.map +1 -1
  67. package/build/services/review/ReviewPlanBuilderService.js +88 -30
  68. package/build/services/review/ReviewPlanBuilderService.js.map +1 -1
  69. package/build/services/review/ReviewStorageService.d.ts +34 -10
  70. package/build/services/review/ReviewStorageService.d.ts.map +1 -1
  71. package/build/services/review/ReviewStorageService.js +100 -14
  72. package/build/services/review/ReviewStorageService.js.map +1 -1
  73. package/build/services/testing/TestRunnerService.d.ts.map +1 -1
  74. package/build/services/testing/TestRunnerService.js +10 -8
  75. package/build/services/testing/TestRunnerService.js.map +1 -1
  76. package/build/types/criteria.d.ts +8 -6
  77. package/build/types/criteria.d.ts.map +1 -1
  78. package/build/types/review.d.ts +123 -28
  79. package/build/types/review.d.ts.map +1 -1
  80. package/package.json +3 -1
  81. package/ui/dist/index.html +12 -63
  82. package/build/prompts/PlannerPrompt.d.ts +0 -12
  83. package/build/prompts/PlannerPrompt.d.ts.map +0 -1
  84. package/build/prompts/PlannerPrompt.js +0 -34
  85. package/build/prompts/PlannerPrompt.js.map +0 -1
  86. package/build/services/review/ReviewContextExtractorService.d.ts +0 -17
  87. package/build/services/review/ReviewContextExtractorService.d.ts.map +0 -1
  88. package/build/services/review/ReviewContextExtractorService.js +0 -69
  89. package/build/services/review/ReviewContextExtractorService.js.map +0 -1
  90. package/build/services/review/ReviewPlannerService.d.ts +0 -29
  91. package/build/services/review/ReviewPlannerService.d.ts.map +0 -1
  92. package/build/services/review/ReviewPlannerService.js +0 -122
  93. package/build/services/review/ReviewPlannerService.js.map +0 -1
package/README.md CHANGED
@@ -2,8 +2,6 @@
2
2
 
3
3
  Modular code review powered by Claude. Define what to check as markdown, deskcheck runs each check in a fresh AI agent, and aggregates the findings.
4
4
 
5
- ![Run overview](docs/screenshots/run-overview.png)
6
-
7
5
  ## Why deskcheck?
8
6
 
9
7
  Traditional code review tools leave a gap:
@@ -12,11 +10,11 @@ Traditional code review tools leave a gap:
12
10
  - **Linters** verify syntax — they can't tell you "this endpoint is missing input validation"
13
11
  - **A single LLM** reviewing a whole branch suffers **context rot** — as its context fills up, it starts missing the patterns it's supposed to catch
14
12
 
15
- Deskcheck solves this by breaking every review into the smallest possible unit: **one file + one criterion + one fresh agent**. Each agent gets a clean context with only the code it needs and the specific rules to check. Results are aggregated mechanically.
13
+ Deskcheck solves this by breaking every review into the smallest possible unit. A **partitioner agent** reads your criterion and decides how to split the matched files into focused subtasks. Each subtask runs in a **fresh reviewer agent** with clean context only the code it needs and the specific rules to check. Results are aggregated mechanically.
16
14
 
17
15
  ```
18
- Your code + Criteria → N executor agents → Aggregated findings
19
- (fresh context each)
16
+ Your code + Criteria → Partitioner → N reviewer agents → Aggregated findings
17
+ (per-criterion) (fresh context each)
20
18
  ```
21
19
 
22
20
  ## Quick Start
@@ -31,8 +29,8 @@ deskcheck init
31
29
  # Review your branch changes against main
32
30
  deskcheck diff main
33
31
 
34
- # Review a specific file
35
- deskcheck "src/services/PaymentService.ts"
32
+ # Review with natural language
33
+ deskcheck "review src/services/"
36
34
 
37
35
  # Open the web dashboard
38
36
  deskcheck serve
@@ -42,15 +40,15 @@ deskcheck serve
42
40
 
43
41
  ### 1. You define criteria as markdown
44
42
 
45
- Each criterion is a markdown file with YAML frontmatter that says **what to check**, **which files to check**, and **how important it is**:
43
+ Each criterion is a markdown file with YAML frontmatter that says **what to check**, **which files to check**, and **how to partition the work**:
46
44
 
47
45
  ```yaml
48
46
  ---
49
47
  description: "Checks for common security vulnerabilities"
50
- severity: critical
51
48
  globs:
52
49
  - "src/**/*.ts"
53
50
  - "!src/**/*.test.ts"
51
+ partition: one task per file
54
52
  model: sonnet
55
53
  ---
56
54
 
@@ -60,7 +58,7 @@ You are a security reviewer. Check for:
60
58
  2. **SQL injection** — string concatenation in database queries
61
59
  3. **Missing input validation** — user input used without sanitization
62
60
 
63
- For each issue, report the severity, file, line number, and a fix suggestion.
61
+ For each issue, report the severity, file, line range, and a fix suggestion.
64
62
  ```
65
63
 
66
64
  Put criteria in `deskcheck/criteria/` — organize them however you like:
@@ -75,56 +73,52 @@ deskcheck/criteria/
75
73
  └── error-handling.md
76
74
  ```
77
75
 
78
- ### 2. Deskcheck matches criteria to your files
79
-
80
- Each criterion has `globs` that define which files it applies to. When you run `deskcheck diff main`, it gets the list of changed files and matches them against every criterion's globs. Each match becomes a **task**: one file + one criterion.
81
-
82
- ### 3. Each task runs in a fresh agent
76
+ ### 2. The pipeline runs in four steps
83
77
 
84
- Every task is executed by a new Claude agent with only:
85
- - The file content (or diff)
86
- - The criterion's instructions
87
- - Access to read tools (Read, Glob, Grep) for additional context
88
-
89
- No context leakage between tasks. A fresh agent reviewing one file against one set of rules catches issues with near-100% reliability.
78
+ ```
79
+ Matching Partitioning Reviewing → Complete
80
+ ```
90
81
 
91
- ### 4. Findings are aggregated
82
+ 1. **Matching** — files from `git diff` (or a natural-language file list) are matched against each criterion's glob patterns. Programmatic, no LLM.
83
+ 2. **Partitioning** — for each matched criterion, a fresh agent reads the `partition` instruction and splits the matched files into focused subtasks. The partitioner can inspect files with Read/Grep/Glob to make informed decisions (e.g. "one method per task" requires reading the file to list methods).
84
+ 3. **Reviewing** — each subtask runs in a fresh reviewer agent with only the criterion instructions, the assigned files, and the scope. Reviewers fetch their own context from disk (diffs for change-mode, full files for all-mode). Up to 5 run concurrently.
85
+ 4. **Complete** — issues are aggregated by file, criterion, and severity.
92
86
 
93
- Results are grouped by file, criterion, and severity. You can browse them in the terminal, as markdown (for PR comments), as JSON (for tooling), or in the **web dashboard**:
87
+ ### 3. Findings are aggregated
94
88
 
95
- ![File detail view](docs/screenshots/file-detail.png)
89
+ Results are grouped and viewable in the terminal, as markdown (for PR comments), as JSON (for tooling), or in the **web dashboard** (`deskcheck serve`). The dashboard shows a four-step pipeline bar, per-criterion subtask breakdowns, partitioner reasoning, and issue cards with code snippets and suggested fixes.
96
90
 
97
91
  ## CLI Commands
98
92
 
99
93
  ### `deskcheck diff [git-args...]`
100
94
 
101
- Deterministic review of git changes. No LLM plannerpasses args directly to `git diff`.
95
+ Deterministic review of git changes. No LLM resolverscope and file list are derived directly from `git diff`.
102
96
 
103
97
  ```bash
104
- deskcheck diff main # Changes vs main
105
- deskcheck diff --staged # Staged changes only
106
- deskcheck diff HEAD~3 # Last 3 commits
107
- deskcheck diff main -- src/services/ # Scoped to a directory
108
- deskcheck diff main --dry-run # Preview plan without executing
109
- deskcheck diff main --fail-on=critical # Exit 1 if critical findings (for CI)
110
- deskcheck diff main --format=markdown # Markdown output (for PR comments)
111
- deskcheck diff main --criteria=dto-enforcement # Only run one criterion
112
- deskcheck diff main --criteria=security,naming # Only run specific criteria
98
+ deskcheck diff # Working tree vs HEAD (staged + unstaged)
99
+ deskcheck diff main # Changes vs main
100
+ deskcheck diff HEAD~3 # Last 3 commits
101
+ deskcheck diff main --dry-run # Preview plan (runs partitioners) without executing reviewers
102
+ deskcheck diff main --fail-on=critical # Exit 1 if critical findings (for CI)
103
+ deskcheck diff main --format=markdown # Markdown output (for PR comments)
104
+ deskcheck diff main --criteria=security # Only run one criterion
113
105
  ```
114
106
 
115
107
  ### `deskcheck "<prompt>"`
116
108
 
117
- Natural language review — an LLM agent interprets what you want to check.
109
+ Natural-language review — a resolver agent interprets what you want to check and produces a `{ scope, files }` pair, then the same downstream pipeline runs.
118
110
 
119
111
  ```bash
120
- deskcheck "src/services/OrderService.ts"
112
+ deskcheck "review src/services/"
121
113
  deskcheck "check the auth module"
122
- deskcheck "the calculate method in Commission.ts"
114
+ deskcheck "review changes against develop"
115
+ deskcheck "review src/" --scope changes:main # Override resolver's scope inference
116
+ deskcheck "review src/" --criteria=security # Only run specific criteria
123
117
  ```
124
118
 
125
119
  ### `deskcheck serve`
126
120
 
127
- Web dashboard with live updates. Shows all runs, task progress, usage/cost tracking, and findings with filtering.
121
+ Web dashboard with live updates via SSE. Four views: run list, review overview (pipeline + criteria + issues), criterion detail (partitioner reasoning + subtask list), and subtask detail (issue cards with code).
128
122
 
129
123
  ```bash
130
124
  deskcheck serve # Start on default port (3000)
@@ -144,7 +138,7 @@ deskcheck show --fail-on=warning # Exit 1 if warnings or worse
144
138
 
145
139
  ### `deskcheck watch [plan-id]`
146
140
 
147
- Live terminal tree view of a run in progress.
141
+ Live terminal tree view of a run in progress. Shows partition decisions and per-subtask `[focus]` annotations.
148
142
 
149
143
  ### `deskcheck list`
150
144
 
@@ -160,32 +154,10 @@ deskcheck test controller-conventions # Run tests for one criterion
160
154
  deskcheck test --criteria=dto-enforcement,naming # Run tests for specific criteria
161
155
  ```
162
156
 
163
- Test fixtures live in `deskcheck/tests/` mirroring the criteria directory structure. Each test case has a fixture file (code to review) and an `expected.md` (what should be found). An LLM judge compares actual findings against expectations and produces scores:
164
-
165
- - **Recall** — Were expected violations found?
166
- - **Precision** — Were all findings legitimate?
167
- - **Scope compliance** — Did every finding come from the criterion's checklist?
168
-
169
- Results are persisted in `.deskcheck/test-runs/` for inspection.
170
-
171
157
  ### `deskcheck init`
172
158
 
173
159
  Scaffold config and criteria directory for a new project.
174
160
 
175
- ## Web Dashboard
176
-
177
- Start with `deskcheck serve` and open `http://localhost:3000`.
178
-
179
- **Run overview** — progress bar, usage/cost tracking, sortable task table with severity filters, and file coverage:
180
-
181
- ![Run overview](docs/screenshots/run-overview.png)
182
-
183
- **File detail** — click any file to see all findings across criteria, with severity filtering and grouping options:
184
-
185
- ![File detail](docs/screenshots/file-detail.png)
186
-
187
- The dashboard uses SSE for live updates — watch tasks complete in real time during execution.
188
-
189
161
  ## Criterion Reference
190
162
 
191
163
  ### Frontmatter Fields
@@ -193,10 +165,10 @@ The dashboard uses SSE for live updates — watch tasks complete in real time du
193
165
  | Field | Required | Default | Description |
194
166
  |-------|----------|---------|-------------|
195
167
  | `description` | Yes | — | Human-readable description shown in reports |
196
- | `severity` | Yes | — | Importance: `critical`, `high`, `medium`, `low` |
197
168
  | `globs` | Yes | — | File patterns to match. Prefix with `!` to exclude |
198
- | `mode` | No | `"One task per file"` | How to split files into tasks (natural language) |
199
- | `model` | No | `"haiku"` | Claude model: `haiku`, `sonnet`, `opus` |
169
+ | `partition` | No | `"one task per matched file"` | Natural-language instruction for how the partitioner agent should split matched files into subtasks |
170
+ | `model` | No | `"haiku"` | Claude model for reviewer agents: `haiku`, `sonnet`, `opus` |
171
+ | `tools` | No | `[]` | Extra tools available to reviewers for this criterion (e.g. `["WebFetch"]`), layered on top of built-ins |
200
172
 
201
173
  ### Choosing the Right Model
202
174
 
@@ -212,19 +184,32 @@ The dashboard uses SSE for live updates — watch tasks complete in real time du
212
184
 
213
185
  ### The Detective Prompt
214
186
 
215
- The markdown body below the frontmatter is the **detective prompt** — instructions given to each executor agent. Include:
187
+ The markdown body below the frontmatter is the **detective prompt** — instructions given to each reviewer agent. Include:
216
188
 
217
189
  - **What to check** — specific patterns and violations
218
190
  - **What NOT to check** — exclusions to reduce false positives
219
191
  - **Severity guidance** — when to report critical vs warning vs info
220
192
 
221
- The agent has read access to the project, so your prompt can reference other files:
193
+ Reviewers have built-in tools (Read, Grep, Glob, Bash) and fetch their own context based on the scope, so your prompt can reference other files:
222
194
 
223
195
  ```markdown
224
196
  Read `.eslintrc.js` to understand the project's linting config.
225
197
  Then check for architectural patterns that ESLint can't catch.
226
198
  ```
227
199
 
200
+ ### Partition Instruction
201
+
202
+ The `partition` field tells the partitioner agent how to split matched files into subtasks. Examples:
203
+
204
+ ```yaml
205
+ partition: one task per file # Simple, default-like
206
+ partition: one public method per task # Sub-file: same file appears in multiple subtasks with different focus
207
+ partition: group each test with its source # Cross-file grouping
208
+ partition: bundle all controllers together # Single grouped review
209
+ ```
210
+
211
+ The partitioner agent reads this instruction, inspects the matched files using its tools, and produces subtasks with `files`, optional `focus` (sub-file narrowing), and optional `hint` (reasoning for the grouping).
212
+
228
213
  ## Configuration
229
214
 
230
215
  Configuration lives in `.deskcheck/config.json` (created by `deskcheck init`):
@@ -239,7 +224,8 @@ Configuration lives in `.deskcheck/config.json` (created by `deskcheck init`):
239
224
  "mcp_servers": {}
240
225
  },
241
226
  "agents": {
242
- "planner": { "model": "haiku" },
227
+ "resolver": { "model": "haiku" },
228
+ "partitioner": { "model": "haiku" },
243
229
  "executor": {},
244
230
  "evaluator": { "model": "haiku" },
245
231
  "judge": { "model": "opus" }
@@ -247,8 +233,10 @@ Configuration lives in `.deskcheck/config.json` (created by `deskcheck init`):
247
233
  }
248
234
  ```
249
235
 
250
- - The **executor model** comes from each criterion's `model` field, not from config. This lets cheap checks use `haiku` and important checks use `sonnet`.
251
- - The **judge model** (used by `deskcheck test`) defaults to `opus` for accurate evaluation of findings against expectations.
236
+ - **Built-in reviewer tools** (`Read`, `Grep`, `Glob`, `Bash`) are always available regardless of `shared.allowed_tools`. The config tools layer on top.
237
+ - The **reviewer model** comes from each criterion's `model` field, not from config.
238
+ - The **partitioner model** comes from `agents.partitioner.model` (shared across all criteria).
239
+ - The **resolver model** (for natural-language `deskcheck "<prompt>"`) comes from `agents.resolver.model`.
252
240
 
253
241
  ## CI Integration
254
242
 
@@ -280,26 +268,36 @@ Deskcheck can run as an MCP server for Claude Code integration:
280
268
  }
281
269
  ```
282
270
 
283
- ## Usage Tracking
271
+ ## Demo & Development
272
+
273
+ ### Seed fixtures for UI work (free, no API calls)
274
+
275
+ ```bash
276
+ npm run seed -- --clean # Write 5 synthetic plans exercising every UI state
277
+ deskcheck serve # http://localhost:3000
278
+ ```
279
+
280
+ ### Run a real review against the demo project (~5–15¢)
281
+
282
+ ```bash
283
+ cd examples/demo-project
284
+ git init -q && git add -A && git commit -qm init # one-time setup
285
+ deskcheck "review src/" # runs resolver + partitioners + reviewers
286
+ ```
284
287
 
285
- Every run tracks token usage and cost per task. The web dashboard shows totals (cost, input/output tokens) and per-task breakdowns, so you can see exactly how much each review costs and which criteria are most expensive.
288
+ See [`examples/demo-project/README.md`](examples/demo-project/README.md) for the planted issues and expected findings.
286
289
 
287
- ## Development
290
+ ### Development setup
288
291
 
289
292
  The fastest way to get started is with the included **Dev Container** (VS Code + Docker):
290
293
 
291
294
  1. Open the repo in VS Code
292
- 2. When prompted, click **"Reopen in Container"** (or run `Dev Containers: Reopen in Container` from the command palette)
295
+ 2. When prompted, click **"Reopen in Container"**
293
296
  3. Press **Ctrl+Shift+B** to launch the dev environment
294
297
 
295
- This starts three processes in a single terminal group:
296
- - **Backend server** on port 3000 (builds TypeScript, then runs `deskcheck serve`)
297
- - **TypeScript watch** (`tsc --watch` for backend changes)
298
- - **Vite dev server** on port 5173 (Vue UI with hot reload)
299
-
300
- Open `http://localhost:5173` for UI development — API requests are proxied to the backend automatically.
298
+ This starts backend server (port 3000), TypeScript watch, and Vite dev server (port 5173).
301
299
 
302
- ### Without Dev Container
300
+ Without Dev Container:
303
301
 
304
302
  ```bash
305
303
  # Terminal 1: backend
@@ -312,10 +310,6 @@ npm run dev
312
310
  cd ui && npm install && npm run dev
313
311
  ```
314
312
 
315
- ## Disclaimer
316
-
317
- This tool was vibe-coded in a single day using [Claude Code](https://claude.ai/claude-code). The architecture, implementation, web UI, and even this README were built through conversation with Claude Opus 4.6. It works, we use it, but it hasn't been battle-tested at scale. Expect rough edges. Contributions welcome.
318
-
319
313
  ## License
320
314
 
321
315
  MIT
package/build/cli.js CHANGED
@@ -7,7 +7,8 @@ import { loadConfig, DEFAULT_CONFIG } from "./config/loader.js";
7
7
  import { ReviewStorageService } from "./services/review/ReviewStorageService.js";
8
8
  import { discoverModules, filterModules } from "./services/criteria/module-parser.js";
9
9
  import { buildPlanWithTasks } from "./services/review/ReviewPlanBuilderService.js";
10
- import { ReviewPlannerService } from "./services/review/ReviewPlannerService.js";
10
+ import { ReviewInputResolverService } from "./services/review/ReviewInputResolverService.js";
11
+ import { ReviewPartitionerService } from "./services/review/ReviewPartitionerService.js";
11
12
  import { ReviewOrchestratorService } from "./services/review/ReviewOrchestratorService.js";
12
13
  import { renderTerminal } from "./renderers/review/TerminalRenderer.js";
13
14
  import { renderMarkdown } from "./renderers/review/MarkdownRenderer.js";
@@ -32,6 +33,14 @@ const RESET = "\x1b[0m";
32
33
  function resolveProjectRoot() {
33
34
  return process.cwd();
34
35
  }
36
+ /** Build the PlanInvocation snapshot for storage from the current process. */
37
+ function captureInvocation(projectRoot) {
38
+ return {
39
+ command: "deskcheck",
40
+ args: process.argv.slice(2),
41
+ cwd: projectRoot,
42
+ };
43
+ }
35
44
  function formatFindingsSummary(results) {
36
45
  const { critical, warning, info, total } = results.summary;
37
46
  if (total === 0)
@@ -219,9 +228,14 @@ async function diffCommand(gitArgs, options) {
219
228
  const config = loadConfig(projectRoot);
220
229
  const storageDir = path.join(projectRoot, config.storage_dir);
221
230
  const storage = new ReviewStorageService(storageDir);
222
- // Get changed files via git diff
223
- // Insert --name-only right after "diff" so it comes before any -- path separators
224
- const gitDiffArgs = ["diff", "--name-only", ...gitArgs];
231
+ // Resolve the diff ref. The first positional (non-flag) arg becomes the ref;
232
+ // with no positional, default to HEAD. This is the same ref the reviewer
233
+ // will use later (`git diff <ref> -- <file>`), so file discovery and the
234
+ // reviewer's per-file diffs see the same baseline. Bare `deskcheck diff`
235
+ // therefore reviews working-tree-vs-HEAD = staged + unstaged combined.
236
+ const ref = gitArgs.find((a) => !a.startsWith("-")) ?? "HEAD";
237
+ const passthrough = gitArgs.filter((a) => a !== ref);
238
+ const gitDiffArgs = ["diff", "--name-only", ref, ...passthrough];
225
239
  let fileOutput;
226
240
  try {
227
241
  fileOutput = execFileSync("git", gitDiffArgs, {
@@ -247,12 +261,23 @@ async function diffCommand(gitArgs, options) {
247
261
  const patterns = options.criteria.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
248
262
  modules = filterModules(modules, patterns);
249
263
  }
250
- // Build a human-readable name from git args
251
- const diffTarget = gitArgs.filter((a) => !a.startsWith("--")).join(" ") || "working tree";
252
- const planName = `diff: ${diffTarget}`;
253
- const sourceTarget = gitArgs[0] ?? "HEAD";
254
- const source = { type: "diff", target: sourceTarget };
255
- const plan = buildPlanWithTasks(storage, planName, source, files, modules);
264
+ // Build a human-readable plan name and the structured scope.
265
+ const planName = `diff: ${ref}`;
266
+ const scope = { type: "changes", ref };
267
+ const invocation = captureInvocation(projectRoot);
268
+ const partitioner = new ReviewPartitionerService(config, projectRoot);
269
+ const plan = await buildPlanWithTasks(storage, partitioner, planName, scope, invocation, files, modules, {
270
+ onMatchingComplete: (criteriaCount, fileCount) => {
271
+ console.log(`${DIM} Matching: ${criteriaCount} criteria matched ${fileCount} file(s)${RESET}`);
272
+ if (criteriaCount > 0) {
273
+ console.log(`${DIM} Partitioning...${RESET}`);
274
+ }
275
+ },
276
+ onPartitionCompleted: (decision) => {
277
+ const name = decision.review_id.split("/").pop() ?? decision.review_id;
278
+ console.log(`${DIM} ${name}: ${decision.subtasks.length} subtask(s) from ${decision.matched_files.length} file(s)${RESET}`);
279
+ },
280
+ });
256
281
  printPlanSummary(plan);
257
282
  if (options.dryRun) {
258
283
  console.log(`${DIM} Dry run — plan created but not executed.${RESET}`);
@@ -263,32 +288,113 @@ async function diffCommand(gitArgs, options) {
263
288
  console.log(`${DIM} No criteria matched the changed files.${RESET}`);
264
289
  process.exit(0);
265
290
  }
266
- // Execute
291
+ // Execute. If the orchestrator throws (per-task errors are handled
292
+ // internally and don't escape), stamp the failure on the plan first.
267
293
  const orchestrator = new ReviewOrchestratorService(config, projectRoot);
268
- await executeAndPrint(orchestrator, plan.plan_id);
294
+ try {
295
+ await executeAndPrint(orchestrator, plan.plan_id);
296
+ }
297
+ catch (err) {
298
+ storage.setFailure(plan.plan_id, {
299
+ step: "reviewing",
300
+ review_id: null,
301
+ message: err instanceof Error ? err.message : String(err),
302
+ });
303
+ throw err;
304
+ }
269
305
  // Render results
270
306
  const finalPlan = storage.getPlan(plan.plan_id);
271
307
  const results = storage.getResults(plan.plan_id);
272
308
  console.log(renderOutput(results, finalPlan, options.format));
273
309
  process.exit(checkFailOn(results, options.failOn));
274
310
  }
275
- /** Default command — natural language deskcheck via LLM planner. */
311
+ /**
312
+ * Parse the `--scope` flag value into a structured Scope.
313
+ *
314
+ * Accepted forms:
315
+ * all → { type: "all" }
316
+ * changes → { type: "changes", ref: "HEAD" }
317
+ * changes:<ref> → { type: "changes", ref: "<ref>" }
318
+ */
319
+ function parseScopeFlag(value) {
320
+ const trimmed = value.trim();
321
+ if (trimmed === "all")
322
+ return { type: "all" };
323
+ if (trimmed === "changes")
324
+ return { type: "changes", ref: "HEAD" };
325
+ if (trimmed.startsWith("changes:")) {
326
+ const ref = trimmed.slice("changes:".length).trim();
327
+ if (!ref)
328
+ throw new Error(`--scope changes: requires a ref (e.g. changes:main)`);
329
+ return { type: "changes", ref };
330
+ }
331
+ throw new Error(`Invalid --scope value: "${value}". Expected "all", "changes", or "changes:<ref>".`);
332
+ }
333
+ /** Default command — natural-language deskcheck via the input resolver agent. */
276
334
  async function deskchecCommand(prompt, options) {
277
335
  const projectRoot = resolveProjectRoot();
278
336
  const config = loadConfig(projectRoot);
279
337
  const storageDir = path.join(projectRoot, config.storage_dir);
280
- console.log(`${DIM}Planning...${RESET}`);
338
+ const storage = new ReviewStorageService(storageDir);
339
+ const scopeOverride = options.scope ? parseScopeFlag(options.scope) : undefined;
281
340
  const criteriaFilter = options.criteria
282
341
  ? options.criteria.split(",").map((s) => s.trim()).filter((s) => s.length > 0)
283
342
  : undefined;
284
- const planner = new ReviewPlannerService(config, projectRoot);
285
- const plan = await planner.plan(prompt, criteriaFilter);
343
+ // Step 1: resolve { scope, files } from natural language.
344
+ console.log(`${DIM}Resolving...${RESET}`);
345
+ const resolver = new ReviewInputResolverService(config, projectRoot);
346
+ const { scope, files } = await resolver.resolve(prompt, scopeOverride);
347
+ // Step 2: discover and filter criteria (programmatic, no LLM).
348
+ const modulesDir = path.resolve(projectRoot, config.modules_dir);
349
+ let modules = discoverModules(modulesDir);
350
+ if (criteriaFilter) {
351
+ modules = filterModules(modules, criteriaFilter);
352
+ }
353
+ const invocation = captureInvocation(projectRoot);
354
+ // Empty file list → empty plan with a friendly message, exit clean.
355
+ if (files.length === 0) {
356
+ const emptyPlan = storage.createPlan(prompt, scope, invocation);
357
+ storage.setMatchedFiles(emptyPlan.plan_id, [], []);
358
+ storage.finalizePlan(emptyPlan.plan_id);
359
+ console.log("");
360
+ console.log(`${DIM} No files matched the request. Nothing to review.${RESET}`);
361
+ console.log(`${DIM} Plan ID: ${emptyPlan.plan_id}${RESET}`);
362
+ process.exit(0);
363
+ }
364
+ // Step 3: build the plan (glob match → partition → tasks).
365
+ const partitioner = new ReviewPartitionerService(config, projectRoot);
366
+ const plan = await buildPlanWithTasks(storage, partitioner, prompt, scope, invocation, files, modules, {
367
+ onMatchingComplete: (criteriaCount, fileCount) => {
368
+ console.log(`${DIM} Matching: ${criteriaCount} criteria matched ${fileCount} file(s)${RESET}`);
369
+ if (criteriaCount > 0) {
370
+ console.log(`${DIM} Partitioning...${RESET}`);
371
+ }
372
+ },
373
+ onPartitionCompleted: (decision) => {
374
+ const name = decision.review_id.split("/").pop() ?? decision.review_id;
375
+ console.log(`${DIM} ${name}: ${decision.subtasks.length} subtask(s) from ${decision.matched_files.length} file(s)${RESET}`);
376
+ },
377
+ });
286
378
  printPlanSummary(plan);
287
- // Execute
379
+ if (Object.keys(plan.tasks).length === 0) {
380
+ console.log(`${DIM} No criteria matched the resolved files.${RESET}`);
381
+ process.exit(0);
382
+ }
383
+ // Step 4: execute reviewers. If the orchestrator throws, mark the plan
384
+ // as failed at the reviewing step before re-raising.
288
385
  const orchestrator = new ReviewOrchestratorService(config, projectRoot);
289
- await executeAndPrint(orchestrator, plan.plan_id);
290
- // Render results
291
- const storage = new ReviewStorageService(storageDir);
386
+ try {
387
+ await executeAndPrint(orchestrator, plan.plan_id);
388
+ }
389
+ catch (err) {
390
+ storage.setFailure(plan.plan_id, {
391
+ step: "reviewing",
392
+ review_id: null,
393
+ message: err instanceof Error ? err.message : String(err),
394
+ });
395
+ throw err;
396
+ }
397
+ // Step 5: render.
292
398
  const finalPlan = storage.getPlan(plan.plan_id);
293
399
  const results = storage.getResults(plan.plan_id);
294
400
  console.log(renderTerminal(results, finalPlan));
@@ -405,12 +511,13 @@ const program = new Command();
405
511
  program
406
512
  .name("deskcheck")
407
513
  .description("Modular code deskcheck tool powered by Claude")
408
- .version("0.1.0");
514
+ .version("0.4.0");
409
515
  // Default command: natural language deskcheck
410
516
  program
411
517
  .argument("[prompt]", "What to check (natural language)")
412
518
  .option("--fail-on <severities>", "Exit non-zero if findings match: critical, warning, info (comma-separated)")
413
519
  .option("--criteria <names>", "Only run specific criteria (comma-separated, e.g. dto-enforcement,controller-conventions)")
520
+ .option("--scope <value>", "Override resolver scope inference: 'all', 'changes', or 'changes:<ref>'")
414
521
  .action(async (prompt, options) => {
415
522
  if (!prompt) {
416
523
  program.help();
@@ -436,10 +543,9 @@ program
436
543
  .option("--criteria <names>", "Only run specific criteria (comma-separated, e.g. dto-enforcement,controller-conventions)")
437
544
  .addHelpText("after", `
438
545
  Examples:
546
+ deskcheck diff Check working tree vs HEAD (staged + unstaged)
439
547
  deskcheck diff develop Check changes vs develop branch
440
- deskcheck diff --staged Check staged changes
441
548
  deskcheck diff HEAD~3 Check last 3 commits
442
- deskcheck diff main -- app/ Check changes in app/ vs main
443
549
  deskcheck diff develop --dry-run Show plan without executing
444
550
  deskcheck diff develop --fail-on=critical Exit non-zero on critical findings
445
551
  deskcheck diff develop --criteria=dto-enforcement Only run one criterion