@mechanai/deepreview 2.14.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,9 +58,9 @@ Write your validated plan to the output path provided. Use this structure:
58
58
 
59
59
  ## Fix Plan
60
60
 
61
- ### Fix [N]: [Issue Title]
61
+ ### Fix [N]: [Issue Title] (full format — critical/warning)
62
62
  **File(s):** path/to/file:line
63
- **Priority:** critical | warning | suggestion
63
+ **Priority:** critical | warning
64
64
  **Validation:** approved | revised | rejected
65
65
  **Validation notes:** [1-2 sentences: what was checked, what was found]
66
66
  **Approach:** [original or revised approach]
@@ -68,6 +68,14 @@ Write your validated plan to the output path provided. Use this structure:
68
68
  [Original code if approved, corrected code if revised, "[rejected — see validation notes]" if rejected]
69
69
  **Verification:** [from original plan]
70
70
 
71
+ ### Fix [N]: [Issue Title] (compact format — suggestion)
72
+ **File(s):** path/to/file:line
73
+ **Priority:** suggestion
74
+ **Validation:** approved | revised | rejected
75
+ **Validation notes:** [1-2 sentences: what was checked, what was found]
76
+ **Change:** [Original or corrected instruction; "[rejected — see validation notes]" if rejected]
77
+ **Verification:** [from original plan, if present]
78
+
71
79
  ## Order of Operations
72
80
  [Revised if any fixes were rejected or reordering is needed]
73
81
 
@@ -25,19 +25,22 @@ You will receive a path to a synthesis file. Read it.
25
25
 
26
26
  ## Documentation Drift handling
27
27
 
28
- If the synthesis contains a "Documentation Drift" section with a batched checklist, consolidate all those items into a **single** fix entry in the plan. Do not create separate fix entries for each documentation item. Use this format:
28
+ If the synthesis contains a "Documentation Drift" section with a batched checklist, consolidate all those items into a **single** fix entry in the plan using compact format. Do not create separate fix entries for each documentation item. Use this format:
29
29
 
30
30
  ```
31
31
  ### Fix [N]: Documentation Updates
32
+
32
33
  **File(s):** [list all affected files]
33
34
  **Priority:** suggestion
34
- **Approach:** Batch update stale/verbose documentation
35
- **Code changes:**
36
- [Group changes by file. For each file, show the exact text replacement.]
35
+ **Change:**
36
+
37
+ - path/to/file1:42 Replace "old text" with "new text"
38
+ - path/to/file2:18 — Delete stale comment
39
+ - path/to/file3:55 — Replace "old description" with "new description"
37
40
  **Verification:** Confirm updated docs match current code behavior
38
41
  ```
39
42
 
40
- Critical documentation findings (which appear individually in the "Critical Issues" section, not in "Documentation Drift") should still get their own fix entries.
43
+ Critical documentation findings (which appear individually in the "Critical Issues" section, not in "Documentation Drift") should still get their own fix entries using full format.
41
44
 
42
45
  ## Quality rules
43
46
 
@@ -45,6 +48,15 @@ Critical documentation findings (which appear individually in the "Critical Issu
45
48
  - **Stay within scope.** Only fix what the synthesis identifies. Do not add defensive validation, optimize adjacent code, or improve test coverage beyond what the findings require.
46
49
  - **Concrete, not aspirational.** Every code change must be copy-pasteable. No pseudocode, no "something like this", no TODOs.
47
50
 
51
+ ## Format selection
52
+
53
+ Select the output format for each fix based on its priority:
54
+
55
+ - **Full format** for `critical` and `warning` fixes — include Approach and Code change blocks
56
+ - **Compact format** for `suggestion` fixes — use a one-line Change instruction instead (e.g., "Replace X with Y", "Delete line N", "Add X after line N")
57
+
58
+ For batched documentation fixes, the Change field uses a bullet list (one instruction per location).
59
+
48
60
  ## Output format
49
61
 
50
62
  Write your implementation plan to the output path provided. Use this structure:
@@ -53,22 +65,33 @@ Write your implementation plan to the output path provided. Use this structure:
53
65
  # Implementation Plan — [PR/branch] — [date]
54
66
 
55
67
  ## Summary
68
+
56
69
  [What needs to be fixed and the estimated scope of work]
57
70
 
58
71
  ## Fix Plan
59
72
 
60
- ### Fix [N]: [Issue Title]
73
+ ### Fix [N]: [Issue Title] (full format — critical/warning)
74
+
61
75
  **File(s):** path/to/file:line
62
- **Priority:** critical | warning | suggestion
76
+ **Priority:** critical | warning
63
77
  **Approach:** [what to change and why — 1-2 sentences]
64
78
  **Code change:**
65
79
  [Exact code to replace the problematic code. Use actual variable names, actual logic. Not pseudocode.]
66
80
  **Verification:** [what to check after the fix — 1 sentence]
67
81
 
82
+ ### Fix [N]: [Issue Title] (compact format — suggestion)
83
+
84
+ **File(s):** path/to/file:line
85
+ **Priority:** suggestion
86
+ **Change:** [One-line instruction: "Replace X with Y" or "Delete lines N-M" or "Add X after line N"]
87
+ **Verification:** [optional — omit if obvious]
88
+
68
89
  ## Order of Operations
90
+
69
91
  [If fixes depend on each other, specify the order. Otherwise: "Fixes are independent — apply in any order."]
70
92
 
71
93
  ## Risk
94
+
72
95
  [Any fixes that could introduce new issues and what to watch for]
73
96
  ```
74
97
 
@@ -18,6 +18,7 @@ Set ITERATION=1
18
18
  Set PRIOR_CONTEXT="" (empty — built up across iterations; holds both design context and prior findings)
19
19
  Set CONSECUTIVE_ZERO_NEW=0 (tracks consecutive iterations with 0 new findings for deadlock detection)
20
20
  Set ALL_SESSION_DIRS=[] (list of all session directories used, in order)
21
+ Set EXPIRED_ENTRIES=[] (calibration entries that have expired — collected at start, used at end)
21
22
 
22
23
  Determine REPO_ROOT — the main repository root (not a worktree root). Run:
23
24
  `REPO_ROOT=$(realpath "$(git rev-parse --git-common-dir)" | sed 's|/\.git$||')`
@@ -30,6 +31,12 @@ Extract PROJECT_CONTEXT by detecting project metadata (version, deployment model
30
31
  - Format as a calibration preamble with version info, deployment model, and guidance for severity adjustment
31
32
  - If metadata extraction fails or no version info is found, set PROJECT_CONTEXT="" (empty string)
32
33
 
34
+ Load learned calibration by calling the `deepreview-calibration-load` tool (no arguments needed — it uses the working directory). Parse the JSON response:
35
+
36
+ - If `preamble` is non-empty, append it to PROJECT_CONTEXT (after the static calibration guidelines)
37
+ - If `expired` is non-empty, store EXPIRED_ENTRIES for end-of-session prompting
38
+ - If the tool fails or returns empty, proceed without calibration (do not STOP)
39
+
33
40
  Build PRIOR_CONTEXT:
34
41
 
35
42
  - Start with PROJECT_CONTEXT (if non-empty)
@@ -62,7 +69,7 @@ NOVELTY MODE (iter2+ only):
62
69
  A) CONVERGENCE EXIT: If `0 new AND 0 regression`:
63
70
 
64
71
  - Tell the user: "deepreview-loop converged after $ITERATION iteration(s). No new findings detected."
65
- - STOP.
72
+ - Go to STEP 6 (calibration proposal), then STOP.
66
73
 
67
74
  B) DEADLOCK (synthesizer signal): If `0 new AND N recurring (N > 0) AND 0 regression` for 2 consecutive iterations:
68
75
 
@@ -95,7 +102,7 @@ Compare this iteration's findings (file:line + issue title) against the previous
95
102
  If the synthesis/review has 0 critical AND 0 warning AND 0 suggestion findings:
96
103
 
97
104
  - Tell the user: "deepreview-loop complete after $ITERATION iteration(s). No findings remain."
98
- - STOP.
105
+ - Go to STEP 6 (calibration proposal), then STOP.
99
106
 
100
107
  STEP 4: APPLY ALL FIXES
101
108
  Dispatch the applier automatically — do NOT ask the user for permission.
@@ -130,7 +137,7 @@ If ITERATION > 5:
130
137
  - Tell the user: "deepreviewloop hit iteration limit (5). Remaining findings may require manual intervention or a design decision."
131
138
  - Show the latest stats.
132
139
  - Ask the user: "Continue for more iterations, or stop here?"
133
- - If user says stop → STOP.
140
+ - If user says stop → Go to STEP 6 (calibration proposal), then STOP.
134
141
  - If user says continue → reset limit to ITERATION + 5 and proceed.
135
142
 
136
143
  Create new session directory: SESSION_DIR="$REPO_ROOT/.ai/deepreview/loop-iter$ITERATION-$(date +%Y-%m-%d-%H%M%S)"
@@ -309,10 +316,48 @@ If this task fails, emit a warning: "Plan validation failed — applying unvalid
309
316
 
310
317
  Go to STEP 3.
311
318
 
319
+ STEP 6: PROPOSE CALIBRATION UPDATES (runs after any exit — clean exit, deadlock, or iteration limit)
320
+
321
+ Skip this step if ITERATION == 1 AND the exit was a clean exit (0 findings on first pass means nothing to calibrate).
322
+
323
+ Compare reviewer severity to synthesized severity from the LAST completed iteration:
324
+
325
+ 1. Read the reviewer files ($SESSION_DIR/review-\*.md) from the last iteration
326
+ 2. Read the synthesis file ($SESSION_DIR/synthesis.md) from the last iteration
327
+ 3. For each finding in the synthesis, identify if ANY reviewer originally flagged it at a HIGHER severity
328
+ 4. Only consider DOWNGRADES (reviewer said "critical" or "warning", synthesis says lower)
329
+
330
+ For each detected downgrade pattern:
331
+
332
+ - Check if it matches an existing calibration entry (from EXPIRED_ENTRIES or active entries loaded at start)
333
+ - If yes: increment observedCount, set lastConfirmed to today
334
+ - If new: create a proposed entry with observedCount=1
335
+
336
+ If there are proposed updates (new or incremented entries) OR expired entries to remove:
337
+
338
+ - Call `deepreview-calibration-load` to get current entries
339
+ - Present to user:
340
+
341
+ ```
342
+ Calibration update proposed:
343
+
344
+ - NEW: "[pattern]" in [context]: [originalSeverity] → [adjustedSeverity]
345
+ - UPDATED: "[pattern]" in [context]: observed N→N+1, re-confirmed
346
+ - EXPIRED: "[pattern]" (last confirmed N days ago) — will be removed
347
+
348
+ Accept these changes? [y/n/edit]
349
+ ```
350
+
351
+ - If user approves: merge proposed entries with existing active entries (removing expired ones), then call `deepreview-calibration-save` with the merged entries
352
+ - If user says "edit": let them modify the proposal, then save
353
+ - If user rejects: skip saving
354
+
355
+ If no adjustments were detected and no entries expired, skip this step silently.
356
+
312
357
  IMPORTANT RULES:
313
358
 
314
- - Do NOT read any review/synthesis/plan files yourself. Ever.
315
- - Use ONLY the file paths and stats/summary lines returned by subagents.
359
+ - Do NOT read any review/synthesis/plan files yourself during STEPS 1-5. Exception: STEP 6 (calibration proposal) requires reading reviewer and synthesis files to detect severity adjustments.
360
+ - Use ONLY the file paths and stats/summary lines returned by subagents (during STEPS 1-5).
316
361
  - Apply ALL findings (critical, warning, AND suggestion) — the goal is a clean review.
317
362
  - Do NOT ask the user for permission to apply fixes. Apply automatically.
318
363
  - DO ask the user if: iteration limit is hit, deadlock is detected, verification fails, or diff size diverges.
@@ -14,6 +14,7 @@ STEP 1: DETERMINE INPUT
14
14
  - Set PRIOR_CONTEXT="" (empty — built up across iterations; holds both design context and prior findings)
15
15
  - Set CONSECUTIVE_ZERO_NEW=0 (tracks consecutive iterations with 0 new findings for deadlock detection)
16
16
  - Set ALL_SESSION_DIRS=[] (list of all session directories used, in order)
17
+ - Set EXPIRED_ENTRIES=[] (calibration entries that have expired — collected at start, used at end)
17
18
  - Determine REPO_ROOT — the main repository root (not a worktree root). Run:
18
19
  `REPO_ROOT=$(realpath "$(git rev-parse --git-common-dir)" | sed 's|/\.git$||')`
19
20
 
@@ -25,6 +26,12 @@ Extract PROJECT_CONTEXT by detecting project metadata (version, deployment model
25
26
  - Format as a calibration preamble with version info, deployment model, and guidance for severity adjustment
26
27
  - If metadata extraction fails or no version info is found, set PROJECT_CONTEXT="" (empty string)
27
28
 
29
+ Load learned calibration by calling the `deepreview-calibration-load` tool (no arguments needed — it uses the working directory). Parse the JSON response:
30
+
31
+ - If `preamble` is non-empty, append it to PROJECT_CONTEXT (after the static calibration guidelines)
32
+ - If `expired` is non-empty, store EXPIRED_ENTRIES for end-of-session prompting
33
+ - If the tool fails or returns empty, proceed without calibration (do not STOP)
34
+
28
35
  Build PRIOR_CONTEXT:
29
36
 
30
37
  - Start with PROJECT_CONTEXT (if non-empty)
@@ -56,12 +63,12 @@ NOVELTY MODE (iter2+ only):
56
63
  A) CLEAN EXIT: If 0 critical AND 0 warning AND 0 suggestion:
57
64
 
58
65
  - Tell the user: "deepreview-spec-loop complete after $ITERATION iteration(s). No findings remain."
59
- - STOP.
66
+ - Go to STEP 6 (calibration proposal), then STOP.
60
67
 
61
68
  B) CONVERGENCE EXIT: If `0 new AND 0 regression`:
62
69
 
63
70
  - Tell the user: "deepreview-spec-loop converged after $ITERATION iteration(s). No new findings detected. Remaining recurring findings (if any) reflect reviewer opinion differences."
64
- - STOP.
71
+ - Go to STEP 6 (calibration proposal), then STOP.
65
72
 
66
73
  C) DEADLOCK EXIT: If `0 new AND N recurring (N > 0) AND 0 regression` for 2 consecutive iterations:
67
74
 
@@ -94,13 +101,13 @@ Track the total finding count (critical + warning + suggestion) for each iterati
94
101
  A) CLEAN EXIT: If 0 critical AND 0 warning AND 0 suggestion:
95
102
 
96
103
  - Tell the user: "deepreview-spec-loop complete after $ITERATION iteration(s). No findings remain."
97
- - STOP.
104
+ - Go to STEP 6 (calibration proposal), then STOP.
98
105
 
99
106
  B) PLATEAU EXIT: If ITERATION >= 3 and the total has not decreased compared to the minimum of any previous iteration for 2 consecutive iterations:
100
107
 
101
108
  - Tell the user: "deepreview-spec-loop plateau after $ITERATION iteration(s). Findings are oscillating (history: [list totals]) and not converging."
102
109
  - Show the latest stats breakdown.
103
- - STOP.
110
+ - Go to STEP 6 (calibration proposal), then STOP.
104
111
 
105
112
  STEP 4: APPLY ALL FIXES
106
113
  Dispatch the applier automatically — do NOT ask the user for permission.
@@ -118,7 +125,7 @@ If ITERATION > 7:
118
125
 
119
126
  - Tell the user: "deepreview-spec-loop hit iteration limit (7). This should not normally happen — convergence or deadlock detection should have stopped earlier."
120
127
  - Show the latest stats.
121
- - STOP.
128
+ - Go to STEP 6 (calibration proposal), then STOP.
122
129
 
123
130
  Create new session directory: SESSION_DIR="$REPO_ROOT/.ai/deepreview/spec-loop-iter$ITERATION-$(date +%Y-%m-%d-%H%M%S)"
124
131
  Run `mkdir -p $SESSION_DIR`
@@ -250,10 +257,48 @@ If this task fails, emit a warning: "Plan validation failed — applying unvalid
250
257
 
251
258
  Go to STEP 3.
252
259
 
260
+ STEP 6: PROPOSE CALIBRATION UPDATES (runs after any exit — clean exit, plateau, or iteration limit)
261
+
262
+ Skip this step if ITERATION == 1 AND the exit was a clean exit (0 findings on first pass means nothing to calibrate).
263
+
264
+ Compare reviewer severity to synthesized severity from the LAST completed iteration:
265
+
266
+ 1. Read the reviewer files ($SESSION_DIR/review-\*.md) from the last iteration
267
+ 2. Read the synthesis file ($SESSION_DIR/synthesis.md) from the last iteration
268
+ 3. For each finding in the synthesis, identify if ANY reviewer originally flagged it at a HIGHER severity
269
+ 4. Only consider DOWNGRADES (reviewer said "critical" or "warning", synthesis says lower)
270
+
271
+ For each detected downgrade pattern:
272
+
273
+ - Check if it matches an existing calibration entry (from EXPIRED_ENTRIES or active entries loaded at start)
274
+ - If yes: increment observedCount, set lastConfirmed to today
275
+ - If new: create a proposed entry with observedCount=1
276
+
277
+ If there are proposed updates (new or incremented entries) OR expired entries to remove:
278
+
279
+ - Call `deepreview-calibration-load` to get current entries
280
+ - Present to user:
281
+
282
+ ```
283
+ Calibration update proposed:
284
+
285
+ - NEW: "[pattern]" in [context]: [originalSeverity] → [adjustedSeverity]
286
+ - UPDATED: "[pattern]" in [context]: observed N→N+1, re-confirmed
287
+ - EXPIRED: "[pattern]" (last confirmed N days ago) — will be removed
288
+
289
+ Accept these changes? [y/n/edit]
290
+ ```
291
+
292
+ - If user approves: merge proposed entries with existing active entries (removing expired ones), then call `deepreview-calibration-save` with the merged entries
293
+ - If user says "edit": let them modify the proposal, then save
294
+ - If user rejects: skip saving
295
+
296
+ If no adjustments were detected and no entries expired, skip this step silently.
297
+
253
298
  IMPORTANT RULES:
254
299
 
255
- - Do NOT read any review/synthesis/plan files yourself. Ever.
256
- - Use ONLY the file paths and stats/summary lines returned by subagents.
300
+ - Do NOT read any review/synthesis/plan files yourself during STEPS 1-5. Exception: STEP 6 (calibration proposal) requires reading reviewer and synthesis files to detect severity adjustments.
301
+ - Use ONLY the file paths and stats/summary lines returned by subagents (during STEPS 1-5).
257
302
  - Apply ALL findings (critical, warning, AND suggestion) — the goal is a clean review.
258
303
  - Do NOT ask the user for permission to apply fixes. Apply automatically.
259
304
  - DO ask the user if iteration limit is hit, divergence is detected, or deadlock is detected.
@@ -1,6 +1,33 @@
1
1
  import { type Plugin, type PluginInput, tool } from "@opencode-ai/plugin";
2
+ import { execSync } from "node:child_process";
3
+ import { resolve } from "node:path";
2
4
  import { postReview } from "../../src/post-review.ts";
3
5
  import { buildPriorReview } from "../../src/build-prior-review.ts";
6
+ import {
7
+ type CalibrationEntry,
8
+ type CalibrationSettings,
9
+ loadCalibration,
10
+ formatCalibrationPreamble,
11
+ writeCalibration,
12
+ } from "../../src/calibration.ts";
13
+
14
+ /**
15
+ * Resolve the main repository root (not a worktree root) from a working directory.
16
+ * Falls back to the given directory if git resolution fails.
17
+ */
18
+ function resolveRepoRoot(cwd: string): string {
19
+ try {
20
+ const gitCommonDir = execSync("git rev-parse --git-common-dir", {
21
+ cwd,
22
+ encoding: "utf-8",
23
+ }).trim();
24
+ // git-common-dir returns the path to .git (or the shared .git dir for worktrees).
25
+ // It may be relative, so resolve against cwd, then strip trailing /.git for the repo root.
26
+ return resolve(cwd, gitCommonDir).replace(/\/\.git$/u, "");
27
+ } catch {
28
+ return cwd;
29
+ }
30
+ }
4
31
 
5
32
  // oxlint-disable-next-line require-await, max-lines-per-function -- Why: Plugin type signature requires async but this plugin has no async initialization; function is long due to tool registrations with schema definitions
6
33
  export const server: Plugin = async (_input: PluginInput) => {
@@ -69,6 +96,41 @@ export const server: Plugin = async (_input: PluginInput) => {
69
96
  }
70
97
  },
71
98
  }),
99
+ "deepreview-calibration-load": tool({
100
+ description:
101
+ "Load per-project calibration entries (learned severity adjustments from prior " +
102
+ "review sessions). Returns active entries, expired entries needing re-confirmation, " +
103
+ "and a formatted preamble for reviewer injection.",
104
+ args: {},
105
+ async execute(_args, context) {
106
+ const repoRoot = resolveRepoRoot(context.directory);
107
+ const { active, expired } = loadCalibration(repoRoot);
108
+ const preamble = formatCalibrationPreamble(active);
109
+ return JSON.stringify({ active, expired, preamble });
110
+ },
111
+ }),
112
+ "deepreview-calibration-save": tool({
113
+ description:
114
+ "Save calibration entries to .ai/deepreview/calibration.yml (local, unversioned). " +
115
+ "Always writes to local — never modifies .deepreview.yml.",
116
+ args: {
117
+ entries: tool.schema.string().describe("JSON array of CalibrationEntry objects to save"),
118
+ expiry_days: tool.schema
119
+ .number()
120
+ .int()
121
+ .positive()
122
+ .optional()
123
+ .describe("Expiry window in days (default: 30)"),
124
+ },
125
+ async execute(args, context) {
126
+ const repoRoot = resolveRepoRoot(context.directory);
127
+ // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: JSON.parse returns any; schema is validated by the caller (orchestrator)
128
+ const entries = JSON.parse(args.entries) as CalibrationEntry[];
129
+ const settings: CalibrationSettings = { expiryDays: args.expiry_days ?? 30 };
130
+ writeCalibration(repoRoot, { version: 1, settings, entries });
131
+ return JSON.stringify({ written: `${repoRoot}/.ai/deepreview/calibration.yml` });
132
+ },
133
+ }),
72
134
  },
73
135
  };
74
136
  };
package/README.md CHANGED
@@ -83,6 +83,65 @@ graph LR
83
83
  Stages communicate via files on disk — the orchestrator never reads review content into
84
84
  its own context, keeping token usage minimal.
85
85
 
86
+ ## Calibration
87
+
88
+ deepreview learns from validator severity adjustments over time. When validators
89
+ consistently downgrade the same category of finding (e.g., "missing auth" in a
90
+ localhost-only tool), the system proposes calibration entries at the end of each
91
+ review session.
92
+
93
+ ### How it works
94
+
95
+ 1. **Session end:** The orchestrator compares reviewer severity to synthesized
96
+ (post-validation) severity
97
+ 2. **Proposal:** Systematic downgrades are proposed as calibration entries
98
+ 3. **User confirms:** You approve, edit, or reject the proposed changes
99
+ 4. **Next session:** Approved calibration is injected into reviewer prompts,
100
+ reducing severity inflation
101
+
102
+ ### Configuration
103
+
104
+ Local calibration (personal, gitignored):
105
+
106
+ ```yaml
107
+ # .ai/deepreview/calibration.yml
108
+ version: 1
109
+ settings:
110
+ expiryDays: 30 # days before unconfirmed entries expire
111
+ entries:
112
+ - id: "cal-001"
113
+ pattern: "missing authentication"
114
+ context: "localhost-only server"
115
+ originalSeverity: "warning"
116
+ adjustedSeverity: "suggestion"
117
+ observedCount: 4
118
+ lastConfirmed: "2026-06-28"
119
+ createdAt: "2026-06-01"
120
+ ```
121
+
122
+ ### Sharing calibration with your team
123
+
124
+ To share calibration entries, add them to `.deepreview.yml` under the `calibration:` key:
125
+
126
+ ```yaml
127
+ # .deepreview.yml
128
+ threatModel: localhost-only
129
+ calibration:
130
+ settings:
131
+ expiryDays: 60
132
+ entries:
133
+ - id: "shared-001"
134
+ pattern: "missing authentication"
135
+ context: "localhost-only server"
136
+ originalSeverity: "warning"
137
+ adjustedSeverity: "suggestion"
138
+ observedCount: 4
139
+ lastConfirmed: "2026-06-28"
140
+ createdAt: "2026-06-01"
141
+ ```
142
+
143
+ Local entries override shared entries when both match the same `pattern` + `context`.
144
+
86
145
  ### Review agents
87
146
 
88
147
  | Agent | Code review | Spec review |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mechanai/deepreview",
3
- "version": "2.14.0",
3
+ "version": "2.16.0",
4
4
  "description": "Multi-agent parallel code/spec review for OpenCode",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -0,0 +1,249 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from "bun:test";
2
+ import {
3
+ loadCalibration,
4
+ writeCalibration,
5
+ nextId,
6
+ formatCalibrationPreamble,
7
+ type CalibrationEntry,
8
+ } from "./calibration";
9
+ import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
10
+ import path from "node:path";
11
+
12
+ const TEST_ROOT = path.join(import.meta.dirname, "..", "tmp-calibration-test");
13
+
14
+ beforeEach(() => {
15
+ mkdirSync(path.join(TEST_ROOT, ".ai", "deepreview"), { recursive: true });
16
+ });
17
+
18
+ afterEach(() => {
19
+ rmSync(TEST_ROOT, { recursive: true, force: true });
20
+ });
21
+
22
+ // oxlint-disable-next-line max-lines-per-function -- Why: comprehensive test coverage for loadCalibration requires inline YAML fixtures to keep each case self-contained; extracting fixtures would obscure the scenario being tested
23
+ describe("calibration: loadCalibration", () => {
24
+ it("returns empty when no calibration file exists", () => {
25
+ const emptyRoot = path.join(TEST_ROOT, "no-files");
26
+ mkdirSync(emptyRoot, { recursive: true });
27
+ const result = loadCalibration(emptyRoot);
28
+ expect(result.active).toEqual([]);
29
+ expect(result.expired).toEqual([]);
30
+ });
31
+
32
+ it("loads entries from local calibration file", () => {
33
+ const yaml = `
34
+ version: 1
35
+ settings:
36
+ expiryDays: 30
37
+ entries:
38
+ - id: "cal-001"
39
+ pattern: "missing auth"
40
+ context: "localhost-only server"
41
+ originalSeverity: "warning"
42
+ adjustedSeverity: "suggestion"
43
+ observedCount: 3
44
+ lastConfirmed: "${new Date().toISOString().split("T")[0]}"
45
+ createdAt: "2026-01-01"
46
+ `;
47
+ writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), yaml);
48
+ const result = loadCalibration(TEST_ROOT);
49
+ expect(result.active).toHaveLength(1);
50
+ expect(result.active[0].id).toBe("cal-001");
51
+ expect(result.expired).toHaveLength(0);
52
+ });
53
+
54
+ it("filters expired entries based on expiryDays", () => {
55
+ const oldDate = "2020-01-01";
56
+ const yaml = `
57
+ version: 1
58
+ settings:
59
+ expiryDays: 30
60
+ entries:
61
+ - id: "cal-001"
62
+ pattern: "old pattern"
63
+ context: "stale context"
64
+ originalSeverity: "warning"
65
+ adjustedSeverity: "suggestion"
66
+ observedCount: 2
67
+ lastConfirmed: "${oldDate}"
68
+ createdAt: "2020-01-01"
69
+ `;
70
+ writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), yaml);
71
+ const result = loadCalibration(TEST_ROOT);
72
+ expect(result.active).toHaveLength(0);
73
+ expect(result.expired).toHaveLength(1);
74
+ expect(result.expired[0].id).toBe("cal-001");
75
+ });
76
+
77
+ it("merges shared and local entries (local wins on conflict)", () => {
78
+ const today = new Date().toISOString().split("T")[0];
79
+ const sharedYaml = `
80
+ threatModel: localhost-only
81
+ calibration:
82
+ settings:
83
+ expiryDays: 60
84
+ entries:
85
+ - id: "shared-001"
86
+ pattern: "missing auth"
87
+ context: "localhost-only server"
88
+ originalSeverity: "warning"
89
+ adjustedSeverity: "suggestion"
90
+ observedCount: 2
91
+ lastConfirmed: "${today}"
92
+ createdAt: "2026-01-01"
93
+ - id: "shared-002"
94
+ pattern: "stale docs"
95
+ context: "pre-1.0 project"
96
+ originalSeverity: "critical"
97
+ adjustedSeverity: "suggestion"
98
+ observedCount: 5
99
+ lastConfirmed: "${today}"
100
+ createdAt: "2026-01-01"
101
+ `;
102
+ const localYaml = `
103
+ version: 1
104
+ settings:
105
+ expiryDays: 30
106
+ entries:
107
+ - id: "cal-001"
108
+ pattern: "missing auth"
109
+ context: "localhost-only server"
110
+ originalSeverity: "warning"
111
+ adjustedSeverity: "suggestion"
112
+ observedCount: 5
113
+ lastConfirmed: "${today}"
114
+ createdAt: "2026-02-01"
115
+ `;
116
+ writeFileSync(path.join(TEST_ROOT, ".deepreview.yml"), sharedYaml);
117
+ writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), localYaml);
118
+
119
+ const result = loadCalibration(TEST_ROOT);
120
+ // "missing auth" + "localhost-only server" conflict: local wins (observedCount=5)
121
+ const authEntry = result.active.find((e) => e.pattern === "missing auth");
122
+ expect(authEntry?.observedCount).toBe(5);
123
+ expect(authEntry?.id).toBe("cal-001");
124
+ // "stale docs" comes from shared only
125
+ const docsEntry = result.active.find((e) => e.pattern === "stale docs");
126
+ expect(docsEntry?.id).toBe("shared-002");
127
+ });
128
+
129
+ it("handles malformed YAML gracefully", () => {
130
+ writeFileSync(
131
+ path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"),
132
+ "not: [valid: yaml: {{{",
133
+ );
134
+ const result = loadCalibration(TEST_ROOT);
135
+ expect(result.active).toEqual([]);
136
+ expect(result.expired).toEqual([]);
137
+ });
138
+
139
+ it("uses shared expiryDays when local has no settings", () => {
140
+ // Compute a date 60 days ago — with 90-day expiry this should still be active
141
+ const d = new Date();
142
+ d.setDate(d.getDate() - 60);
143
+ const oldDate = d.toISOString().split("T")[0];
144
+ const sharedYaml = `
145
+ threatModel: localhost-only
146
+ calibration:
147
+ settings:
148
+ expiryDays: 90
149
+ entries:
150
+ - id: "shared-001"
151
+ pattern: "some pattern"
152
+ context: "some context"
153
+ originalSeverity: "warning"
154
+ adjustedSeverity: "suggestion"
155
+ observedCount: 2
156
+ lastConfirmed: "${oldDate}"
157
+ createdAt: "2026-01-01"
158
+ `;
159
+ const localYaml = `
160
+ version: 1
161
+ entries: []
162
+ `;
163
+ writeFileSync(path.join(TEST_ROOT, ".deepreview.yml"), sharedYaml);
164
+ writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), localYaml);
165
+
166
+ const result = loadCalibration(TEST_ROOT);
167
+ // With 90-day expiry, an entry from ~60 days ago should still be active
168
+ expect(result.active).toHaveLength(1);
169
+ });
170
+ });
171
+
172
+ describe("calibration: nextId", () => {
173
+ it("returns cal-001 for empty list", () => {
174
+ expect(nextId([])).toBe("cal-001");
175
+ });
176
+
177
+ it("increments from highest existing ID", () => {
178
+ // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: test stubs only need id field; full CalibrationEntry would be verbose for this nextId unit test
179
+ const entries = [{ id: "cal-003" } as any, { id: "cal-001" } as any];
180
+ // oxlint-disable-next-line typescript/no-unsafe-argument -- Why: stub array; safe for nextId which only reads .id
181
+ expect(nextId(entries)).toBe("cal-004");
182
+ });
183
+
184
+ it("handles non-numeric IDs gracefully", () => {
185
+ // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: test stubs only need id field; full CalibrationEntry would be verbose for this nextId unit test
186
+ const entries = [{ id: "shared-001" } as any, { id: "cal-002" } as any];
187
+ // oxlint-disable-next-line typescript/no-unsafe-argument -- Why: stub array; safe for nextId which only reads .id
188
+ expect(nextId(entries)).toBe("cal-003");
189
+ });
190
+ });
191
+
192
+ describe("calibration: formatCalibrationPreamble", () => {
193
+ it("returns empty string for no entries", () => {
194
+ expect(formatCalibrationPreamble([])).toBe("");
195
+ });
196
+
197
+ it("formats entries as markdown preamble", () => {
198
+ const entries: CalibrationEntry[] = [
199
+ {
200
+ id: "cal-001",
201
+ pattern: "missing auth",
202
+ context: "localhost-only server",
203
+ originalSeverity: "warning",
204
+ adjustedSeverity: "suggestion",
205
+ observedCount: 4,
206
+ lastConfirmed: "2026-06-28",
207
+ createdAt: "2026-06-01",
208
+ },
209
+ ];
210
+ const preamble = formatCalibrationPreamble(entries);
211
+ expect(preamble).toContain("Learned Calibration");
212
+ expect(preamble).toContain('"missing auth" in localhost-only server');
213
+ expect(preamble).toContain("suggestion (not warning)");
214
+ expect(preamble).toContain("Confirmed 4x");
215
+ });
216
+ });
217
+
218
+ describe("calibration: writeCalibration", () => {
219
+ it("creates directory and writes YAML file", () => {
220
+ const writeRoot = path.join(TEST_ROOT, "write-test");
221
+ mkdirSync(writeRoot, { recursive: true });
222
+ const today = new Date().toISOString().split("T")[0];
223
+
224
+ writeCalibration(writeRoot, {
225
+ version: 1,
226
+ settings: { expiryDays: 30 },
227
+ entries: [
228
+ {
229
+ id: "cal-001",
230
+ pattern: "test pattern",
231
+ context: "test context",
232
+ originalSeverity: "warning",
233
+ adjustedSeverity: "suggestion",
234
+ observedCount: 1,
235
+ lastConfirmed: today,
236
+ createdAt: today,
237
+ },
238
+ ],
239
+ });
240
+
241
+ const filePath = path.join(writeRoot, ".ai", "deepreview", "calibration.yml");
242
+ expect(existsSync(filePath)).toBe(true);
243
+
244
+ // Verify we can read it back
245
+ const result = loadCalibration(writeRoot);
246
+ expect(result.active).toHaveLength(1);
247
+ expect(result.active[0].pattern).toBe("test pattern");
248
+ });
249
+ });
@@ -0,0 +1,212 @@
1
+ /**
2
+ * Per-project calibration persistence for deepreview severity adjustments.
3
+ *
4
+ * Reads/writes .ai/deepreview/calibration.yml (local) and the calibration
5
+ * section of .deepreview.yml (shared). Handles merge logic, expiry, and
6
+ * preamble formatting for reviewer injection.
7
+ */
8
+
9
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
10
+ import path from "node:path";
11
+ import { dump as dumpYaml, load as loadYaml } from "js-yaml";
12
+
13
+ /** Severity levels used by deepreview findings. */
14
+ export type Severity = "critical" | "warning" | "suggestion";
15
+
16
+ /** A single calibration entry recording a systematic severity adjustment. */
17
+ export interface CalibrationEntry {
18
+ /** Unique identifier (e.g., "cal-001") */
19
+ id: string;
20
+ /** Short description of the finding category */
21
+ pattern: string;
22
+ /** Project-specific context that makes this adjustment valid */
23
+ context: string;
24
+ /** Severity the reviewer originally assigned */
25
+ originalSeverity: Severity;
26
+ /** Severity the validator adjusted to */
27
+ adjustedSeverity: Severity;
28
+ /** How many times this adjustment has been observed */
29
+ observedCount: number;
30
+ /** When a validator last made this same adjustment (ISO date) */
31
+ lastConfirmed: string;
32
+ /** When the entry was first created (ISO date) */
33
+ createdAt: string;
34
+ }
35
+
36
+ /** Configurable settings for calibration behavior. */
37
+ export interface CalibrationSettings {
38
+ /** Days before an unconfirmed entry expires (default: 30) */
39
+ expiryDays: number;
40
+ }
41
+
42
+ /** Top-level calibration file structure. */
43
+ export interface CalibrationData {
44
+ version: 1;
45
+ settings?: CalibrationSettings;
46
+ entries: CalibrationEntry[];
47
+ }
48
+
49
+ interface SharedConfig {
50
+ threatModel?: string;
51
+ calibration?: {
52
+ settings?: Partial<CalibrationSettings>;
53
+ entries?: CalibrationEntry[];
54
+ };
55
+ }
56
+
57
+ const DEFAULT_EXPIRY_DAYS = 30;
58
+ const LOCAL_PATH = ".ai/deepreview/calibration.yml";
59
+ const SHARED_PATH = ".deepreview.yml";
60
+
61
+ function isExpired(entry: CalibrationEntry, expiryDays: number): boolean {
62
+ const lastConfirmed = new Date(entry.lastConfirmed);
63
+ const now = new Date();
64
+ const diffMs = now.getTime() - lastConfirmed.getTime();
65
+ const diffDays = diffMs / (1000 * 60 * 60 * 24);
66
+ return diffDays > expiryDays;
67
+ }
68
+
69
+ function parseLocalFile(filePath: string): CalibrationData | null {
70
+ if (!existsSync(filePath)) {
71
+ return null;
72
+ }
73
+ try {
74
+ const content = readFileSync(filePath, "utf-8");
75
+ // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: loadYaml returns unknown; validated by field access below
76
+ const data = loadYaml(content) as CalibrationData | null;
77
+ if (data === null || typeof data !== "object") {
78
+ return null;
79
+ }
80
+ return data;
81
+ } catch {
82
+ return null;
83
+ }
84
+ }
85
+
86
+ function parseSharedFile(filePath: string): {
87
+ settings?: Partial<CalibrationSettings>;
88
+ entries: CalibrationEntry[];
89
+ } {
90
+ if (!existsSync(filePath)) {
91
+ return { entries: [] };
92
+ }
93
+ try {
94
+ const content = readFileSync(filePath, "utf-8");
95
+ // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: loadYaml returns unknown; validated by field access below
96
+ const data = loadYaml(content) as SharedConfig | null;
97
+ if (data?.calibration === undefined) {
98
+ return { entries: [] };
99
+ }
100
+ return {
101
+ settings: data.calibration.settings,
102
+ entries: data.calibration.entries ?? [],
103
+ };
104
+ } catch {
105
+ return { entries: [] };
106
+ }
107
+ }
108
+
109
+ function entryKey(entry: CalibrationEntry): string {
110
+ return `${entry.pattern}|||${entry.context}`;
111
+ }
112
+
113
+ /**
114
+ * Load and merge calibration from local + shared sources, filtering expired entries.
115
+ *
116
+ * Merge precedence: local entries override shared entries when both match on
117
+ * pattern + context. Settings precedence: local > shared > default (30 days).
118
+ */
119
+ export function loadCalibration(repoRoot: string): {
120
+ active: CalibrationEntry[];
121
+ expired: CalibrationEntry[];
122
+ } {
123
+ const localPath = path.join(repoRoot, LOCAL_PATH);
124
+ const sharedPath = path.join(repoRoot, SHARED_PATH);
125
+
126
+ const local = parseLocalFile(localPath);
127
+ const shared = parseSharedFile(sharedPath);
128
+
129
+ // Settings precedence: local > shared > default
130
+ const expiryDays =
131
+ local?.settings?.expiryDays ?? shared.settings?.expiryDays ?? DEFAULT_EXPIRY_DAYS;
132
+
133
+ // Merge: start with shared entries, then overlay local entries by key
134
+ const merged = new Map<string, CalibrationEntry>();
135
+ for (const entry of shared.entries) {
136
+ merged.set(entryKey(entry), entry);
137
+ }
138
+ for (const entry of local?.entries ?? []) {
139
+ merged.set(entryKey(entry), entry);
140
+ }
141
+
142
+ const active: CalibrationEntry[] = [];
143
+ const expired: CalibrationEntry[] = [];
144
+
145
+ for (const entry of merged.values()) {
146
+ if (isExpired(entry, expiryDays)) {
147
+ expired.push(entry);
148
+ } else {
149
+ active.push(entry);
150
+ }
151
+ }
152
+
153
+ return { active, expired };
154
+ }
155
+
156
+ /**
157
+ * Format active calibration entries as a markdown preamble for reviewer injection.
158
+ * Returns empty string if no entries.
159
+ */
160
+ export function formatCalibrationPreamble(entries: CalibrationEntry[]): string {
161
+ if (entries.length === 0) {
162
+ return "";
163
+ }
164
+
165
+ const lines: string[] = [
166
+ "## Learned Calibration (from prior review sessions)",
167
+ "",
168
+ "The following patterns have been consistently downgraded by validators in this",
169
+ "project. Adjust your severity accordingly — do not inflate these categories:",
170
+ "",
171
+ ];
172
+
173
+ for (const entry of entries) {
174
+ lines.push(
175
+ `- "${entry.pattern}" in ${entry.context} → ${entry.adjustedSeverity} (not ${entry.originalSeverity}). Confirmed ${entry.observedCount}x.`,
176
+ );
177
+ }
178
+
179
+ return lines.join("\n") + "\n";
180
+ }
181
+
182
+ /**
183
+ * Write calibration data to .ai/deepreview/calibration.yml.
184
+ * Creates the .ai/deepreview/ directory if it doesn't exist.
185
+ */
186
+ export function writeCalibration(repoRoot: string, data: CalibrationData): void {
187
+ const dirPath = path.join(repoRoot, ".ai", "deepreview");
188
+ if (!existsSync(dirPath)) {
189
+ mkdirSync(dirPath, { recursive: true });
190
+ }
191
+ const filePath = path.join(dirPath, "calibration.yml");
192
+ const yaml = dumpYaml(data, { lineWidth: 120, noRefs: true });
193
+ writeFileSync(filePath, `# Auto-maintained by deepreview. User-editable.\n${yaml}`);
194
+ }
195
+
196
+ /**
197
+ * Generate the next sequential calibration entry ID.
198
+ * Parses existing "cal-NNN" IDs and returns "cal-(max+1)".
199
+ */
200
+ export function nextId(existing: CalibrationEntry[]): string {
201
+ let max = 0;
202
+ for (const entry of existing) {
203
+ const match = /^cal-(\d+)$/u.exec(entry.id);
204
+ if (match) {
205
+ const num = Number.parseInt(match[1], 10);
206
+ if (num > max) {
207
+ max = num;
208
+ }
209
+ }
210
+ }
211
+ return `cal-${String(max + 1).padStart(3, "0")}`;
212
+ }
@@ -8,6 +8,7 @@
8
8
  import { existsSync, readFileSync } from "node:fs";
9
9
  import path from "node:path";
10
10
  import { load as loadYaml } from "js-yaml";
11
+ import type { Severity } from "./calibration";
11
12
 
12
13
  export interface ProjectMetadata {
13
14
  /** Semantic version (e.g., "0.1.0", "3.2.1") */
@@ -28,6 +29,20 @@ export interface DeepReviewConfig {
28
29
  threatModel?: "localhost-only" | "internal-network" | "public-facing" | "library";
29
30
  /** Additional context hints for reviewers */
30
31
  context?: string;
32
+ /** Shared calibration entries and settings (opt-in team calibration) */
33
+ calibration?: {
34
+ settings?: { expiryDays?: number };
35
+ entries?: Array<{
36
+ id: string;
37
+ pattern: string;
38
+ context: string;
39
+ originalSeverity: Severity;
40
+ adjustedSeverity: Severity;
41
+ observedCount: number;
42
+ lastConfirmed: string;
43
+ createdAt: string;
44
+ }>;
45
+ };
31
46
  }
32
47
 
33
48
  interface PackageJson {