@mechanai/deepreview 2.14.0 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/agents/deepreview-plan-validator.md +10 -2
- package/.opencode/agents/deepreview-planner.md +30 -7
- package/.opencode/commands/deepreview-loop.md +50 -5
- package/.opencode/commands/deepreview-spec-loop.md +52 -7
- package/.opencode/plugins/deepreview.ts +62 -0
- package/README.md +59 -0
- package/package.json +1 -1
- package/src/calibration.test.ts +249 -0
- package/src/calibration.ts +212 -0
- package/src/project-context.ts +15 -0
|
@@ -58,9 +58,9 @@ Write your validated plan to the output path provided. Use this structure:
|
|
|
58
58
|
|
|
59
59
|
## Fix Plan
|
|
60
60
|
|
|
61
|
-
### Fix [N]: [Issue Title]
|
|
61
|
+
### Fix [N]: [Issue Title] (full format — critical/warning)
|
|
62
62
|
**File(s):** path/to/file:line
|
|
63
|
-
**Priority:** critical | warning
|
|
63
|
+
**Priority:** critical | warning
|
|
64
64
|
**Validation:** approved | revised | rejected
|
|
65
65
|
**Validation notes:** [1-2 sentences: what was checked, what was found]
|
|
66
66
|
**Approach:** [original or revised approach]
|
|
@@ -68,6 +68,14 @@ Write your validated plan to the output path provided. Use this structure:
|
|
|
68
68
|
[Original code if approved, corrected code if revised, "[rejected — see validation notes]" if rejected]
|
|
69
69
|
**Verification:** [from original plan]
|
|
70
70
|
|
|
71
|
+
### Fix [N]: [Issue Title] (compact format — suggestion)
|
|
72
|
+
**File(s):** path/to/file:line
|
|
73
|
+
**Priority:** suggestion
|
|
74
|
+
**Validation:** approved | revised | rejected
|
|
75
|
+
**Validation notes:** [1-2 sentences: what was checked, what was found]
|
|
76
|
+
**Change:** [Original or corrected instruction; "[rejected — see validation notes]" if rejected]
|
|
77
|
+
**Verification:** [from original plan, if present]
|
|
78
|
+
|
|
71
79
|
## Order of Operations
|
|
72
80
|
[Revised if any fixes were rejected or reordering is needed]
|
|
73
81
|
|
|
@@ -25,19 +25,22 @@ You will receive a path to a synthesis file. Read it.
|
|
|
25
25
|
|
|
26
26
|
## Documentation Drift handling
|
|
27
27
|
|
|
28
|
-
If the synthesis contains a "Documentation Drift" section with a batched checklist, consolidate all those items into a **single** fix entry in the plan. Do not create separate fix entries for each documentation item. Use this format:
|
|
28
|
+
If the synthesis contains a "Documentation Drift" section with a batched checklist, consolidate all those items into a **single** fix entry in the plan using compact format. Do not create separate fix entries for each documentation item. Use this format:
|
|
29
29
|
|
|
30
30
|
```
|
|
31
31
|
### Fix [N]: Documentation Updates
|
|
32
|
+
|
|
32
33
|
**File(s):** [list all affected files]
|
|
33
34
|
**Priority:** suggestion
|
|
34
|
-
**
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
**Change:**
|
|
36
|
+
|
|
37
|
+
- path/to/file1:42 — Replace "old text" with "new text"
|
|
38
|
+
- path/to/file2:18 — Delete stale comment
|
|
39
|
+
- path/to/file3:55 — Replace "old description" with "new description"
|
|
37
40
|
**Verification:** Confirm updated docs match current code behavior
|
|
38
41
|
```
|
|
39
42
|
|
|
40
|
-
Critical documentation findings (which appear individually in the "Critical Issues" section, not in "Documentation Drift") should still get their own fix entries.
|
|
43
|
+
Critical documentation findings (which appear individually in the "Critical Issues" section, not in "Documentation Drift") should still get their own fix entries using full format.
|
|
41
44
|
|
|
42
45
|
## Quality rules
|
|
43
46
|
|
|
@@ -45,6 +48,15 @@ Critical documentation findings (which appear individually in the "Critical Issu
|
|
|
45
48
|
- **Stay within scope.** Only fix what the synthesis identifies. Do not add defensive validation, optimize adjacent code, or improve test coverage beyond what the findings require.
|
|
46
49
|
- **Concrete, not aspirational.** Every code change must be copy-pasteable. No pseudocode, no "something like this", no TODOs.
|
|
47
50
|
|
|
51
|
+
## Format selection
|
|
52
|
+
|
|
53
|
+
Select the output format for each fix based on its priority:
|
|
54
|
+
|
|
55
|
+
- **Full format** for `critical` and `warning` fixes — include Approach and Code change blocks
|
|
56
|
+
- **Compact format** for `suggestion` fixes — use a one-line Change instruction instead (e.g., "Replace X with Y", "Delete line N", "Add X after line N")
|
|
57
|
+
|
|
58
|
+
For batched documentation fixes, the Change field uses a bullet list (one instruction per location).
|
|
59
|
+
|
|
48
60
|
## Output format
|
|
49
61
|
|
|
50
62
|
Write your implementation plan to the output path provided. Use this structure:
|
|
@@ -53,22 +65,33 @@ Write your implementation plan to the output path provided. Use this structure:
|
|
|
53
65
|
# Implementation Plan — [PR/branch] — [date]
|
|
54
66
|
|
|
55
67
|
## Summary
|
|
68
|
+
|
|
56
69
|
[What needs to be fixed and the estimated scope of work]
|
|
57
70
|
|
|
58
71
|
## Fix Plan
|
|
59
72
|
|
|
60
|
-
### Fix [N]: [Issue Title]
|
|
73
|
+
### Fix [N]: [Issue Title] (full format — critical/warning)
|
|
74
|
+
|
|
61
75
|
**File(s):** path/to/file:line
|
|
62
|
-
**Priority:** critical | warning
|
|
76
|
+
**Priority:** critical | warning
|
|
63
77
|
**Approach:** [what to change and why — 1-2 sentences]
|
|
64
78
|
**Code change:**
|
|
65
79
|
[Exact code to replace the problematic code. Use actual variable names, actual logic. Not pseudocode.]
|
|
66
80
|
**Verification:** [what to check after the fix — 1 sentence]
|
|
67
81
|
|
|
82
|
+
### Fix [N]: [Issue Title] (compact format — suggestion)
|
|
83
|
+
|
|
84
|
+
**File(s):** path/to/file:line
|
|
85
|
+
**Priority:** suggestion
|
|
86
|
+
**Change:** [One-line instruction: "Replace X with Y" or "Delete lines N-M" or "Add X after line N"]
|
|
87
|
+
**Verification:** [optional — omit if obvious]
|
|
88
|
+
|
|
68
89
|
## Order of Operations
|
|
90
|
+
|
|
69
91
|
[If fixes depend on each other, specify the order. Otherwise: "Fixes are independent — apply in any order."]
|
|
70
92
|
|
|
71
93
|
## Risk
|
|
94
|
+
|
|
72
95
|
[Any fixes that could introduce new issues and what to watch for]
|
|
73
96
|
```
|
|
74
97
|
|
|
@@ -18,6 +18,7 @@ Set ITERATION=1
|
|
|
18
18
|
Set PRIOR_CONTEXT="" (empty — built up across iterations; holds both design context and prior findings)
|
|
19
19
|
Set CONSECUTIVE_ZERO_NEW=0 (tracks consecutive iterations with 0 new findings for deadlock detection)
|
|
20
20
|
Set ALL_SESSION_DIRS=[] (list of all session directories used, in order)
|
|
21
|
+
Set EXPIRED_ENTRIES=[] (calibration entries that have expired — collected at start, used at end)
|
|
21
22
|
|
|
22
23
|
Determine REPO_ROOT — the main repository root (not a worktree root). Run:
|
|
23
24
|
`REPO_ROOT=$(realpath "$(git rev-parse --git-common-dir)" | sed 's|/\.git$||')`
|
|
@@ -30,6 +31,12 @@ Extract PROJECT_CONTEXT by detecting project metadata (version, deployment model
|
|
|
30
31
|
- Format as a calibration preamble with version info, deployment model, and guidance for severity adjustment
|
|
31
32
|
- If metadata extraction fails or no version info is found, set PROJECT_CONTEXT="" (empty string)
|
|
32
33
|
|
|
34
|
+
Load learned calibration by calling the `deepreview-calibration-load` tool (no arguments needed — it uses the working directory). Parse the JSON response:
|
|
35
|
+
|
|
36
|
+
- If `preamble` is non-empty, append it to PROJECT_CONTEXT (after the static calibration guidelines)
|
|
37
|
+
- If `expired` is non-empty, store EXPIRED_ENTRIES for end-of-session prompting
|
|
38
|
+
- If the tool fails or returns empty, proceed without calibration (do not STOP)
|
|
39
|
+
|
|
33
40
|
Build PRIOR_CONTEXT:
|
|
34
41
|
|
|
35
42
|
- Start with PROJECT_CONTEXT (if non-empty)
|
|
@@ -62,7 +69,7 @@ NOVELTY MODE (iter2+ only):
|
|
|
62
69
|
A) CONVERGENCE EXIT: If `0 new AND 0 regression`:
|
|
63
70
|
|
|
64
71
|
- Tell the user: "deepreview-loop converged after $ITERATION iteration(s). No new findings detected."
|
|
65
|
-
- STOP.
|
|
72
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
66
73
|
|
|
67
74
|
B) DEADLOCK (synthesizer signal): If `0 new AND N recurring (N > 0) AND 0 regression` for 2 consecutive iterations:
|
|
68
75
|
|
|
@@ -95,7 +102,7 @@ Compare this iteration's findings (file:line + issue title) against the previous
|
|
|
95
102
|
If the synthesis/review has 0 critical AND 0 warning AND 0 suggestion findings:
|
|
96
103
|
|
|
97
104
|
- Tell the user: "deepreview-loop complete after $ITERATION iteration(s). No findings remain."
|
|
98
|
-
- STOP.
|
|
105
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
99
106
|
|
|
100
107
|
STEP 4: APPLY ALL FIXES
|
|
101
108
|
Dispatch the applier automatically — do NOT ask the user for permission.
|
|
@@ -130,7 +137,7 @@ If ITERATION > 5:
|
|
|
130
137
|
- Tell the user: "deepreviewloop hit iteration limit (5). Remaining findings may require manual intervention or a design decision."
|
|
131
138
|
- Show the latest stats.
|
|
132
139
|
- Ask the user: "Continue for more iterations, or stop here?"
|
|
133
|
-
- If user says stop → STOP.
|
|
140
|
+
- If user says stop → Go to STEP 6 (calibration proposal), then STOP.
|
|
134
141
|
- If user says continue → reset limit to ITERATION + 5 and proceed.
|
|
135
142
|
|
|
136
143
|
Create new session directory: SESSION_DIR="$REPO_ROOT/.ai/deepreview/loop-iter$ITERATION-$(date +%Y-%m-%d-%H%M%S)"
|
|
@@ -309,10 +316,48 @@ If this task fails, emit a warning: "Plan validation failed — applying unvalid
|
|
|
309
316
|
|
|
310
317
|
Go to STEP 3.
|
|
311
318
|
|
|
319
|
+
STEP 6: PROPOSE CALIBRATION UPDATES (runs after any exit — clean exit, deadlock, or iteration limit)
|
|
320
|
+
|
|
321
|
+
Skip this step if ITERATION == 1 AND the exit was a clean exit (0 findings on first pass means nothing to calibrate).
|
|
322
|
+
|
|
323
|
+
Compare reviewer severity to synthesized severity from the LAST completed iteration:
|
|
324
|
+
|
|
325
|
+
1. Read the reviewer files ($SESSION_DIR/review-\*.md) from the last iteration
|
|
326
|
+
2. Read the synthesis file ($SESSION_DIR/synthesis.md) from the last iteration
|
|
327
|
+
3. For each finding in the synthesis, identify if ANY reviewer originally flagged it at a HIGHER severity
|
|
328
|
+
4. Only consider DOWNGRADES (reviewer said "critical" or "warning", synthesis says lower)
|
|
329
|
+
|
|
330
|
+
For each detected downgrade pattern:
|
|
331
|
+
|
|
332
|
+
- Check if it matches an existing calibration entry (from EXPIRED_ENTRIES or active entries loaded at start)
|
|
333
|
+
- If yes: increment observedCount, set lastConfirmed to today
|
|
334
|
+
- If new: create a proposed entry with observedCount=1
|
|
335
|
+
|
|
336
|
+
If there are proposed updates (new or incremented entries) OR expired entries to remove:
|
|
337
|
+
|
|
338
|
+
- Call `deepreview-calibration-load` to get current entries
|
|
339
|
+
- Present to user:
|
|
340
|
+
|
|
341
|
+
```
|
|
342
|
+
Calibration update proposed:
|
|
343
|
+
|
|
344
|
+
- NEW: "[pattern]" in [context]: [originalSeverity] → [adjustedSeverity]
|
|
345
|
+
- UPDATED: "[pattern]" in [context]: observed N→N+1, re-confirmed
|
|
346
|
+
- EXPIRED: "[pattern]" (last confirmed N days ago) — will be removed
|
|
347
|
+
|
|
348
|
+
Accept these changes? [y/n/edit]
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
- If user approves: merge proposed entries with existing active entries (removing expired ones), then call `deepreview-calibration-save` with the merged entries
|
|
352
|
+
- If user says "edit": let them modify the proposal, then save
|
|
353
|
+
- If user rejects: skip saving
|
|
354
|
+
|
|
355
|
+
If no adjustments were detected and no entries expired, skip this step silently.
|
|
356
|
+
|
|
312
357
|
IMPORTANT RULES:
|
|
313
358
|
|
|
314
|
-
- Do NOT read any review/synthesis/plan files yourself.
|
|
315
|
-
- Use ONLY the file paths and stats/summary lines returned by subagents.
|
|
359
|
+
- Do NOT read any review/synthesis/plan files yourself during STEPS 1-5. Exception: STEP 6 (calibration proposal) requires reading reviewer and synthesis files to detect severity adjustments.
|
|
360
|
+
- Use ONLY the file paths and stats/summary lines returned by subagents (during STEPS 1-5).
|
|
316
361
|
- Apply ALL findings (critical, warning, AND suggestion) — the goal is a clean review.
|
|
317
362
|
- Do NOT ask the user for permission to apply fixes. Apply automatically.
|
|
318
363
|
- DO ask the user if: iteration limit is hit, deadlock is detected, verification fails, or diff size diverges.
|
|
@@ -14,6 +14,7 @@ STEP 1: DETERMINE INPUT
|
|
|
14
14
|
- Set PRIOR_CONTEXT="" (empty — built up across iterations; holds both design context and prior findings)
|
|
15
15
|
- Set CONSECUTIVE_ZERO_NEW=0 (tracks consecutive iterations with 0 new findings for deadlock detection)
|
|
16
16
|
- Set ALL_SESSION_DIRS=[] (list of all session directories used, in order)
|
|
17
|
+
- Set EXPIRED_ENTRIES=[] (calibration entries that have expired — collected at start, used at end)
|
|
17
18
|
- Determine REPO_ROOT — the main repository root (not a worktree root). Run:
|
|
18
19
|
`REPO_ROOT=$(realpath "$(git rev-parse --git-common-dir)" | sed 's|/\.git$||')`
|
|
19
20
|
|
|
@@ -25,6 +26,12 @@ Extract PROJECT_CONTEXT by detecting project metadata (version, deployment model
|
|
|
25
26
|
- Format as a calibration preamble with version info, deployment model, and guidance for severity adjustment
|
|
26
27
|
- If metadata extraction fails or no version info is found, set PROJECT_CONTEXT="" (empty string)
|
|
27
28
|
|
|
29
|
+
Load learned calibration by calling the `deepreview-calibration-load` tool (no arguments needed — it uses the working directory). Parse the JSON response:
|
|
30
|
+
|
|
31
|
+
- If `preamble` is non-empty, append it to PROJECT_CONTEXT (after the static calibration guidelines)
|
|
32
|
+
- If `expired` is non-empty, store EXPIRED_ENTRIES for end-of-session prompting
|
|
33
|
+
- If the tool fails or returns empty, proceed without calibration (do not STOP)
|
|
34
|
+
|
|
28
35
|
Build PRIOR_CONTEXT:
|
|
29
36
|
|
|
30
37
|
- Start with PROJECT_CONTEXT (if non-empty)
|
|
@@ -56,12 +63,12 @@ NOVELTY MODE (iter2+ only):
|
|
|
56
63
|
A) CLEAN EXIT: If 0 critical AND 0 warning AND 0 suggestion:
|
|
57
64
|
|
|
58
65
|
- Tell the user: "deepreview-spec-loop complete after $ITERATION iteration(s). No findings remain."
|
|
59
|
-
- STOP.
|
|
66
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
60
67
|
|
|
61
68
|
B) CONVERGENCE EXIT: If `0 new AND 0 regression`:
|
|
62
69
|
|
|
63
70
|
- Tell the user: "deepreview-spec-loop converged after $ITERATION iteration(s). No new findings detected. Remaining recurring findings (if any) reflect reviewer opinion differences."
|
|
64
|
-
- STOP.
|
|
71
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
65
72
|
|
|
66
73
|
C) DEADLOCK EXIT: If `0 new AND N recurring (N > 0) AND 0 regression` for 2 consecutive iterations:
|
|
67
74
|
|
|
@@ -94,13 +101,13 @@ Track the total finding count (critical + warning + suggestion) for each iterati
|
|
|
94
101
|
A) CLEAN EXIT: If 0 critical AND 0 warning AND 0 suggestion:
|
|
95
102
|
|
|
96
103
|
- Tell the user: "deepreview-spec-loop complete after $ITERATION iteration(s). No findings remain."
|
|
97
|
-
- STOP.
|
|
104
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
98
105
|
|
|
99
106
|
B) PLATEAU EXIT: If ITERATION >= 3 and the total has not decreased compared to the minimum of any previous iteration for 2 consecutive iterations:
|
|
100
107
|
|
|
101
108
|
- Tell the user: "deepreview-spec-loop plateau after $ITERATION iteration(s). Findings are oscillating (history: [list totals]) and not converging."
|
|
102
109
|
- Show the latest stats breakdown.
|
|
103
|
-
- STOP.
|
|
110
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
104
111
|
|
|
105
112
|
STEP 4: APPLY ALL FIXES
|
|
106
113
|
Dispatch the applier automatically — do NOT ask the user for permission.
|
|
@@ -118,7 +125,7 @@ If ITERATION > 7:
|
|
|
118
125
|
|
|
119
126
|
- Tell the user: "deepreview-spec-loop hit iteration limit (7). This should not normally happen — convergence or deadlock detection should have stopped earlier."
|
|
120
127
|
- Show the latest stats.
|
|
121
|
-
- STOP.
|
|
128
|
+
- Go to STEP 6 (calibration proposal), then STOP.
|
|
122
129
|
|
|
123
130
|
Create new session directory: SESSION_DIR="$REPO_ROOT/.ai/deepreview/spec-loop-iter$ITERATION-$(date +%Y-%m-%d-%H%M%S)"
|
|
124
131
|
Run `mkdir -p $SESSION_DIR`
|
|
@@ -250,10 +257,48 @@ If this task fails, emit a warning: "Plan validation failed — applying unvalid
|
|
|
250
257
|
|
|
251
258
|
Go to STEP 3.
|
|
252
259
|
|
|
260
|
+
STEP 6: PROPOSE CALIBRATION UPDATES (runs after any exit — clean exit, plateau, or iteration limit)
|
|
261
|
+
|
|
262
|
+
Skip this step if ITERATION == 1 AND the exit was a clean exit (0 findings on first pass means nothing to calibrate).
|
|
263
|
+
|
|
264
|
+
Compare reviewer severity to synthesized severity from the LAST completed iteration:
|
|
265
|
+
|
|
266
|
+
1. Read the reviewer files ($SESSION_DIR/review-\*.md) from the last iteration
|
|
267
|
+
2. Read the synthesis file ($SESSION_DIR/synthesis.md) from the last iteration
|
|
268
|
+
3. For each finding in the synthesis, identify if ANY reviewer originally flagged it at a HIGHER severity
|
|
269
|
+
4. Only consider DOWNGRADES (reviewer said "critical" or "warning", synthesis says lower)
|
|
270
|
+
|
|
271
|
+
For each detected downgrade pattern:
|
|
272
|
+
|
|
273
|
+
- Check if it matches an existing calibration entry (from EXPIRED_ENTRIES or active entries loaded at start)
|
|
274
|
+
- If yes: increment observedCount, set lastConfirmed to today
|
|
275
|
+
- If new: create a proposed entry with observedCount=1
|
|
276
|
+
|
|
277
|
+
If there are proposed updates (new or incremented entries) OR expired entries to remove:
|
|
278
|
+
|
|
279
|
+
- Call `deepreview-calibration-load` to get current entries
|
|
280
|
+
- Present to user:
|
|
281
|
+
|
|
282
|
+
```
|
|
283
|
+
Calibration update proposed:
|
|
284
|
+
|
|
285
|
+
- NEW: "[pattern]" in [context]: [originalSeverity] → [adjustedSeverity]
|
|
286
|
+
- UPDATED: "[pattern]" in [context]: observed N→N+1, re-confirmed
|
|
287
|
+
- EXPIRED: "[pattern]" (last confirmed N days ago) — will be removed
|
|
288
|
+
|
|
289
|
+
Accept these changes? [y/n/edit]
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
- If user approves: merge proposed entries with existing active entries (removing expired ones), then call `deepreview-calibration-save` with the merged entries
|
|
293
|
+
- If user says "edit": let them modify the proposal, then save
|
|
294
|
+
- If user rejects: skip saving
|
|
295
|
+
|
|
296
|
+
If no adjustments were detected and no entries expired, skip this step silently.
|
|
297
|
+
|
|
253
298
|
IMPORTANT RULES:
|
|
254
299
|
|
|
255
|
-
- Do NOT read any review/synthesis/plan files yourself.
|
|
256
|
-
- Use ONLY the file paths and stats/summary lines returned by subagents.
|
|
300
|
+
- Do NOT read any review/synthesis/plan files yourself during STEPS 1-5. Exception: STEP 6 (calibration proposal) requires reading reviewer and synthesis files to detect severity adjustments.
|
|
301
|
+
- Use ONLY the file paths and stats/summary lines returned by subagents (during STEPS 1-5).
|
|
257
302
|
- Apply ALL findings (critical, warning, AND suggestion) — the goal is a clean review.
|
|
258
303
|
- Do NOT ask the user for permission to apply fixes. Apply automatically.
|
|
259
304
|
- DO ask the user if iteration limit is hit, divergence is detected, or deadlock is detected.
|
|
@@ -1,6 +1,33 @@
|
|
|
1
1
|
import { type Plugin, type PluginInput, tool } from "@opencode-ai/plugin";
|
|
2
|
+
import { execSync } from "node:child_process";
|
|
3
|
+
import { resolve } from "node:path";
|
|
2
4
|
import { postReview } from "../../src/post-review.ts";
|
|
3
5
|
import { buildPriorReview } from "../../src/build-prior-review.ts";
|
|
6
|
+
import {
|
|
7
|
+
type CalibrationEntry,
|
|
8
|
+
type CalibrationSettings,
|
|
9
|
+
loadCalibration,
|
|
10
|
+
formatCalibrationPreamble,
|
|
11
|
+
writeCalibration,
|
|
12
|
+
} from "../../src/calibration.ts";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Resolve the main repository root (not a worktree root) from a working directory.
|
|
16
|
+
* Falls back to the given directory if git resolution fails.
|
|
17
|
+
*/
|
|
18
|
+
function resolveRepoRoot(cwd: string): string {
|
|
19
|
+
try {
|
|
20
|
+
const gitCommonDir = execSync("git rev-parse --git-common-dir", {
|
|
21
|
+
cwd,
|
|
22
|
+
encoding: "utf-8",
|
|
23
|
+
}).trim();
|
|
24
|
+
// git-common-dir returns the path to .git (or the shared .git dir for worktrees).
|
|
25
|
+
// It may be relative, so resolve against cwd, then strip trailing /.git for the repo root.
|
|
26
|
+
return resolve(cwd, gitCommonDir).replace(/\/\.git$/u, "");
|
|
27
|
+
} catch {
|
|
28
|
+
return cwd;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
4
31
|
|
|
5
32
|
// oxlint-disable-next-line require-await, max-lines-per-function -- Why: Plugin type signature requires async but this plugin has no async initialization; function is long due to tool registrations with schema definitions
|
|
6
33
|
export const server: Plugin = async (_input: PluginInput) => {
|
|
@@ -69,6 +96,41 @@ export const server: Plugin = async (_input: PluginInput) => {
|
|
|
69
96
|
}
|
|
70
97
|
},
|
|
71
98
|
}),
|
|
99
|
+
"deepreview-calibration-load": tool({
|
|
100
|
+
description:
|
|
101
|
+
"Load per-project calibration entries (learned severity adjustments from prior " +
|
|
102
|
+
"review sessions). Returns active entries, expired entries needing re-confirmation, " +
|
|
103
|
+
"and a formatted preamble for reviewer injection.",
|
|
104
|
+
args: {},
|
|
105
|
+
async execute(_args, context) {
|
|
106
|
+
const repoRoot = resolveRepoRoot(context.directory);
|
|
107
|
+
const { active, expired } = loadCalibration(repoRoot);
|
|
108
|
+
const preamble = formatCalibrationPreamble(active);
|
|
109
|
+
return JSON.stringify({ active, expired, preamble });
|
|
110
|
+
},
|
|
111
|
+
}),
|
|
112
|
+
"deepreview-calibration-save": tool({
|
|
113
|
+
description:
|
|
114
|
+
"Save calibration entries to .ai/deepreview/calibration.yml (local, unversioned). " +
|
|
115
|
+
"Always writes to local — never modifies .deepreview.yml.",
|
|
116
|
+
args: {
|
|
117
|
+
entries: tool.schema.string().describe("JSON array of CalibrationEntry objects to save"),
|
|
118
|
+
expiry_days: tool.schema
|
|
119
|
+
.number()
|
|
120
|
+
.int()
|
|
121
|
+
.positive()
|
|
122
|
+
.optional()
|
|
123
|
+
.describe("Expiry window in days (default: 30)"),
|
|
124
|
+
},
|
|
125
|
+
async execute(args, context) {
|
|
126
|
+
const repoRoot = resolveRepoRoot(context.directory);
|
|
127
|
+
// oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: JSON.parse returns any; schema is validated by the caller (orchestrator)
|
|
128
|
+
const entries = JSON.parse(args.entries) as CalibrationEntry[];
|
|
129
|
+
const settings: CalibrationSettings = { expiryDays: args.expiry_days ?? 30 };
|
|
130
|
+
writeCalibration(repoRoot, { version: 1, settings, entries });
|
|
131
|
+
return JSON.stringify({ written: `${repoRoot}/.ai/deepreview/calibration.yml` });
|
|
132
|
+
},
|
|
133
|
+
}),
|
|
72
134
|
},
|
|
73
135
|
};
|
|
74
136
|
};
|
package/README.md
CHANGED
|
@@ -83,6 +83,65 @@ graph LR
|
|
|
83
83
|
Stages communicate via files on disk — the orchestrator never reads review content into
|
|
84
84
|
its own context, keeping token usage minimal.
|
|
85
85
|
|
|
86
|
+
## Calibration
|
|
87
|
+
|
|
88
|
+
deepreview learns from validator severity adjustments over time. When validators
|
|
89
|
+
consistently downgrade the same category of finding (e.g., "missing auth" in a
|
|
90
|
+
localhost-only tool), the system proposes calibration entries at the end of each
|
|
91
|
+
review session.
|
|
92
|
+
|
|
93
|
+
### How it works
|
|
94
|
+
|
|
95
|
+
1. **Session end:** The orchestrator compares reviewer severity to synthesized
|
|
96
|
+
(post-validation) severity
|
|
97
|
+
2. **Proposal:** Systematic downgrades are proposed as calibration entries
|
|
98
|
+
3. **User confirms:** You approve, edit, or reject the proposed changes
|
|
99
|
+
4. **Next session:** Approved calibration is injected into reviewer prompts,
|
|
100
|
+
reducing severity inflation
|
|
101
|
+
|
|
102
|
+
### Configuration
|
|
103
|
+
|
|
104
|
+
Local calibration (personal, gitignored):
|
|
105
|
+
|
|
106
|
+
```yaml
|
|
107
|
+
# .ai/deepreview/calibration.yml
|
|
108
|
+
version: 1
|
|
109
|
+
settings:
|
|
110
|
+
expiryDays: 30 # days before unconfirmed entries expire
|
|
111
|
+
entries:
|
|
112
|
+
- id: "cal-001"
|
|
113
|
+
pattern: "missing authentication"
|
|
114
|
+
context: "localhost-only server"
|
|
115
|
+
originalSeverity: "warning"
|
|
116
|
+
adjustedSeverity: "suggestion"
|
|
117
|
+
observedCount: 4
|
|
118
|
+
lastConfirmed: "2026-06-28"
|
|
119
|
+
createdAt: "2026-06-01"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Sharing calibration with your team
|
|
123
|
+
|
|
124
|
+
To share calibration entries, add them to `.deepreview.yml` under the `calibration:` key:
|
|
125
|
+
|
|
126
|
+
```yaml
|
|
127
|
+
# .deepreview.yml
|
|
128
|
+
threatModel: localhost-only
|
|
129
|
+
calibration:
|
|
130
|
+
settings:
|
|
131
|
+
expiryDays: 60
|
|
132
|
+
entries:
|
|
133
|
+
- id: "shared-001"
|
|
134
|
+
pattern: "missing authentication"
|
|
135
|
+
context: "localhost-only server"
|
|
136
|
+
originalSeverity: "warning"
|
|
137
|
+
adjustedSeverity: "suggestion"
|
|
138
|
+
observedCount: 4
|
|
139
|
+
lastConfirmed: "2026-06-28"
|
|
140
|
+
createdAt: "2026-06-01"
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Local entries override shared entries when both match the same `pattern` + `context`.
|
|
144
|
+
|
|
86
145
|
### Review agents
|
|
87
146
|
|
|
88
147
|
| Agent | Code review | Spec review |
|
package/package.json
CHANGED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
loadCalibration,
|
|
4
|
+
writeCalibration,
|
|
5
|
+
nextId,
|
|
6
|
+
formatCalibrationPreamble,
|
|
7
|
+
type CalibrationEntry,
|
|
8
|
+
} from "./calibration";
|
|
9
|
+
import { mkdirSync, writeFileSync, rmSync, existsSync } from "node:fs";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
|
|
12
|
+
const TEST_ROOT = path.join(import.meta.dirname, "..", "tmp-calibration-test");
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
mkdirSync(path.join(TEST_ROOT, ".ai", "deepreview"), { recursive: true });
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
afterEach(() => {
|
|
19
|
+
rmSync(TEST_ROOT, { recursive: true, force: true });
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
// oxlint-disable-next-line max-lines-per-function -- Why: comprehensive test coverage for loadCalibration requires inline YAML fixtures to keep each case self-contained; extracting fixtures would obscure the scenario being tested
|
|
23
|
+
describe("calibration: loadCalibration", () => {
|
|
24
|
+
it("returns empty when no calibration file exists", () => {
|
|
25
|
+
const emptyRoot = path.join(TEST_ROOT, "no-files");
|
|
26
|
+
mkdirSync(emptyRoot, { recursive: true });
|
|
27
|
+
const result = loadCalibration(emptyRoot);
|
|
28
|
+
expect(result.active).toEqual([]);
|
|
29
|
+
expect(result.expired).toEqual([]);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("loads entries from local calibration file", () => {
|
|
33
|
+
const yaml = `
|
|
34
|
+
version: 1
|
|
35
|
+
settings:
|
|
36
|
+
expiryDays: 30
|
|
37
|
+
entries:
|
|
38
|
+
- id: "cal-001"
|
|
39
|
+
pattern: "missing auth"
|
|
40
|
+
context: "localhost-only server"
|
|
41
|
+
originalSeverity: "warning"
|
|
42
|
+
adjustedSeverity: "suggestion"
|
|
43
|
+
observedCount: 3
|
|
44
|
+
lastConfirmed: "${new Date().toISOString().split("T")[0]}"
|
|
45
|
+
createdAt: "2026-01-01"
|
|
46
|
+
`;
|
|
47
|
+
writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), yaml);
|
|
48
|
+
const result = loadCalibration(TEST_ROOT);
|
|
49
|
+
expect(result.active).toHaveLength(1);
|
|
50
|
+
expect(result.active[0].id).toBe("cal-001");
|
|
51
|
+
expect(result.expired).toHaveLength(0);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("filters expired entries based on expiryDays", () => {
|
|
55
|
+
const oldDate = "2020-01-01";
|
|
56
|
+
const yaml = `
|
|
57
|
+
version: 1
|
|
58
|
+
settings:
|
|
59
|
+
expiryDays: 30
|
|
60
|
+
entries:
|
|
61
|
+
- id: "cal-001"
|
|
62
|
+
pattern: "old pattern"
|
|
63
|
+
context: "stale context"
|
|
64
|
+
originalSeverity: "warning"
|
|
65
|
+
adjustedSeverity: "suggestion"
|
|
66
|
+
observedCount: 2
|
|
67
|
+
lastConfirmed: "${oldDate}"
|
|
68
|
+
createdAt: "2020-01-01"
|
|
69
|
+
`;
|
|
70
|
+
writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), yaml);
|
|
71
|
+
const result = loadCalibration(TEST_ROOT);
|
|
72
|
+
expect(result.active).toHaveLength(0);
|
|
73
|
+
expect(result.expired).toHaveLength(1);
|
|
74
|
+
expect(result.expired[0].id).toBe("cal-001");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("merges shared and local entries (local wins on conflict)", () => {
|
|
78
|
+
const today = new Date().toISOString().split("T")[0];
|
|
79
|
+
const sharedYaml = `
|
|
80
|
+
threatModel: localhost-only
|
|
81
|
+
calibration:
|
|
82
|
+
settings:
|
|
83
|
+
expiryDays: 60
|
|
84
|
+
entries:
|
|
85
|
+
- id: "shared-001"
|
|
86
|
+
pattern: "missing auth"
|
|
87
|
+
context: "localhost-only server"
|
|
88
|
+
originalSeverity: "warning"
|
|
89
|
+
adjustedSeverity: "suggestion"
|
|
90
|
+
observedCount: 2
|
|
91
|
+
lastConfirmed: "${today}"
|
|
92
|
+
createdAt: "2026-01-01"
|
|
93
|
+
- id: "shared-002"
|
|
94
|
+
pattern: "stale docs"
|
|
95
|
+
context: "pre-1.0 project"
|
|
96
|
+
originalSeverity: "critical"
|
|
97
|
+
adjustedSeverity: "suggestion"
|
|
98
|
+
observedCount: 5
|
|
99
|
+
lastConfirmed: "${today}"
|
|
100
|
+
createdAt: "2026-01-01"
|
|
101
|
+
`;
|
|
102
|
+
const localYaml = `
|
|
103
|
+
version: 1
|
|
104
|
+
settings:
|
|
105
|
+
expiryDays: 30
|
|
106
|
+
entries:
|
|
107
|
+
- id: "cal-001"
|
|
108
|
+
pattern: "missing auth"
|
|
109
|
+
context: "localhost-only server"
|
|
110
|
+
originalSeverity: "warning"
|
|
111
|
+
adjustedSeverity: "suggestion"
|
|
112
|
+
observedCount: 5
|
|
113
|
+
lastConfirmed: "${today}"
|
|
114
|
+
createdAt: "2026-02-01"
|
|
115
|
+
`;
|
|
116
|
+
writeFileSync(path.join(TEST_ROOT, ".deepreview.yml"), sharedYaml);
|
|
117
|
+
writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), localYaml);
|
|
118
|
+
|
|
119
|
+
const result = loadCalibration(TEST_ROOT);
|
|
120
|
+
// "missing auth" + "localhost-only server" conflict: local wins (observedCount=5)
|
|
121
|
+
const authEntry = result.active.find((e) => e.pattern === "missing auth");
|
|
122
|
+
expect(authEntry?.observedCount).toBe(5);
|
|
123
|
+
expect(authEntry?.id).toBe("cal-001");
|
|
124
|
+
// "stale docs" comes from shared only
|
|
125
|
+
const docsEntry = result.active.find((e) => e.pattern === "stale docs");
|
|
126
|
+
expect(docsEntry?.id).toBe("shared-002");
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it("handles malformed YAML gracefully", () => {
|
|
130
|
+
writeFileSync(
|
|
131
|
+
path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"),
|
|
132
|
+
"not: [valid: yaml: {{{",
|
|
133
|
+
);
|
|
134
|
+
const result = loadCalibration(TEST_ROOT);
|
|
135
|
+
expect(result.active).toEqual([]);
|
|
136
|
+
expect(result.expired).toEqual([]);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("uses shared expiryDays when local has no settings", () => {
|
|
140
|
+
// Compute a date 60 days ago — with 90-day expiry this should still be active
|
|
141
|
+
const d = new Date();
|
|
142
|
+
d.setDate(d.getDate() - 60);
|
|
143
|
+
const oldDate = d.toISOString().split("T")[0];
|
|
144
|
+
const sharedYaml = `
|
|
145
|
+
threatModel: localhost-only
|
|
146
|
+
calibration:
|
|
147
|
+
settings:
|
|
148
|
+
expiryDays: 90
|
|
149
|
+
entries:
|
|
150
|
+
- id: "shared-001"
|
|
151
|
+
pattern: "some pattern"
|
|
152
|
+
context: "some context"
|
|
153
|
+
originalSeverity: "warning"
|
|
154
|
+
adjustedSeverity: "suggestion"
|
|
155
|
+
observedCount: 2
|
|
156
|
+
lastConfirmed: "${oldDate}"
|
|
157
|
+
createdAt: "2026-01-01"
|
|
158
|
+
`;
|
|
159
|
+
const localYaml = `
|
|
160
|
+
version: 1
|
|
161
|
+
entries: []
|
|
162
|
+
`;
|
|
163
|
+
writeFileSync(path.join(TEST_ROOT, ".deepreview.yml"), sharedYaml);
|
|
164
|
+
writeFileSync(path.join(TEST_ROOT, ".ai", "deepreview", "calibration.yml"), localYaml);
|
|
165
|
+
|
|
166
|
+
const result = loadCalibration(TEST_ROOT);
|
|
167
|
+
// With 90-day expiry, an entry from ~60 days ago should still be active
|
|
168
|
+
expect(result.active).toHaveLength(1);
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
describe("calibration: nextId", () => {
|
|
173
|
+
it("returns cal-001 for empty list", () => {
|
|
174
|
+
expect(nextId([])).toBe("cal-001");
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it("increments from highest existing ID", () => {
|
|
178
|
+
// oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: test stubs only need id field; full CalibrationEntry would be verbose for this nextId unit test
|
|
179
|
+
const entries = [{ id: "cal-003" } as any, { id: "cal-001" } as any];
|
|
180
|
+
// oxlint-disable-next-line typescript/no-unsafe-argument -- Why: stub array; safe for nextId which only reads .id
|
|
181
|
+
expect(nextId(entries)).toBe("cal-004");
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
it("handles non-numeric IDs gracefully", () => {
|
|
185
|
+
// oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: test stubs only need id field; full CalibrationEntry would be verbose for this nextId unit test
|
|
186
|
+
const entries = [{ id: "shared-001" } as any, { id: "cal-002" } as any];
|
|
187
|
+
// oxlint-disable-next-line typescript/no-unsafe-argument -- Why: stub array; safe for nextId which only reads .id
|
|
188
|
+
expect(nextId(entries)).toBe("cal-003");
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
describe("calibration: formatCalibrationPreamble", () => {
|
|
193
|
+
it("returns empty string for no entries", () => {
|
|
194
|
+
expect(formatCalibrationPreamble([])).toBe("");
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("formats entries as markdown preamble", () => {
|
|
198
|
+
const entries: CalibrationEntry[] = [
|
|
199
|
+
{
|
|
200
|
+
id: "cal-001",
|
|
201
|
+
pattern: "missing auth",
|
|
202
|
+
context: "localhost-only server",
|
|
203
|
+
originalSeverity: "warning",
|
|
204
|
+
adjustedSeverity: "suggestion",
|
|
205
|
+
observedCount: 4,
|
|
206
|
+
lastConfirmed: "2026-06-28",
|
|
207
|
+
createdAt: "2026-06-01",
|
|
208
|
+
},
|
|
209
|
+
];
|
|
210
|
+
const preamble = formatCalibrationPreamble(entries);
|
|
211
|
+
expect(preamble).toContain("Learned Calibration");
|
|
212
|
+
expect(preamble).toContain('"missing auth" in localhost-only server');
|
|
213
|
+
expect(preamble).toContain("suggestion (not warning)");
|
|
214
|
+
expect(preamble).toContain("Confirmed 4x");
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
describe("calibration: writeCalibration", () => {
|
|
219
|
+
it("creates directory and writes YAML file", () => {
|
|
220
|
+
const writeRoot = path.join(TEST_ROOT, "write-test");
|
|
221
|
+
mkdirSync(writeRoot, { recursive: true });
|
|
222
|
+
const today = new Date().toISOString().split("T")[0];
|
|
223
|
+
|
|
224
|
+
writeCalibration(writeRoot, {
|
|
225
|
+
version: 1,
|
|
226
|
+
settings: { expiryDays: 30 },
|
|
227
|
+
entries: [
|
|
228
|
+
{
|
|
229
|
+
id: "cal-001",
|
|
230
|
+
pattern: "test pattern",
|
|
231
|
+
context: "test context",
|
|
232
|
+
originalSeverity: "warning",
|
|
233
|
+
adjustedSeverity: "suggestion",
|
|
234
|
+
observedCount: 1,
|
|
235
|
+
lastConfirmed: today,
|
|
236
|
+
createdAt: today,
|
|
237
|
+
},
|
|
238
|
+
],
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
const filePath = path.join(writeRoot, ".ai", "deepreview", "calibration.yml");
|
|
242
|
+
expect(existsSync(filePath)).toBe(true);
|
|
243
|
+
|
|
244
|
+
// Verify we can read it back
|
|
245
|
+
const result = loadCalibration(writeRoot);
|
|
246
|
+
expect(result.active).toHaveLength(1);
|
|
247
|
+
expect(result.active[0].pattern).toBe("test pattern");
|
|
248
|
+
});
|
|
249
|
+
});
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-project calibration persistence for deepreview severity adjustments.
|
|
3
|
+
*
|
|
4
|
+
* Reads/writes .ai/deepreview/calibration.yml (local) and the calibration
|
|
5
|
+
* section of .deepreview.yml (shared). Handles merge logic, expiry, and
|
|
6
|
+
* preamble formatting for reviewer injection.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import { dump as dumpYaml, load as loadYaml } from "js-yaml";
|
|
12
|
+
|
|
13
|
+
/** Severity levels used by deepreview findings. */
|
|
14
|
+
export type Severity = "critical" | "warning" | "suggestion";
|
|
15
|
+
|
|
16
|
+
/** A single calibration entry recording a systematic severity adjustment. */
|
|
17
|
+
export interface CalibrationEntry {
|
|
18
|
+
/** Unique identifier (e.g., "cal-001") */
|
|
19
|
+
id: string;
|
|
20
|
+
/** Short description of the finding category */
|
|
21
|
+
pattern: string;
|
|
22
|
+
/** Project-specific context that makes this adjustment valid */
|
|
23
|
+
context: string;
|
|
24
|
+
/** Severity the reviewer originally assigned */
|
|
25
|
+
originalSeverity: Severity;
|
|
26
|
+
/** Severity the validator adjusted to */
|
|
27
|
+
adjustedSeverity: Severity;
|
|
28
|
+
/** How many times this adjustment has been observed */
|
|
29
|
+
observedCount: number;
|
|
30
|
+
/** When a validator last made this same adjustment (ISO date) */
|
|
31
|
+
lastConfirmed: string;
|
|
32
|
+
/** When the entry was first created (ISO date) */
|
|
33
|
+
createdAt: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Configurable settings for calibration behavior. */
|
|
37
|
+
export interface CalibrationSettings {
|
|
38
|
+
/** Days before an unconfirmed entry expires (default: 30) */
|
|
39
|
+
expiryDays: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Top-level calibration file structure. */
|
|
43
|
+
export interface CalibrationData {
|
|
44
|
+
version: 1;
|
|
45
|
+
settings?: CalibrationSettings;
|
|
46
|
+
entries: CalibrationEntry[];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface SharedConfig {
|
|
50
|
+
threatModel?: string;
|
|
51
|
+
calibration?: {
|
|
52
|
+
settings?: Partial<CalibrationSettings>;
|
|
53
|
+
entries?: CalibrationEntry[];
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const DEFAULT_EXPIRY_DAYS = 30;
|
|
58
|
+
const LOCAL_PATH = ".ai/deepreview/calibration.yml";
|
|
59
|
+
const SHARED_PATH = ".deepreview.yml";
|
|
60
|
+
|
|
61
|
+
function isExpired(entry: CalibrationEntry, expiryDays: number): boolean {
|
|
62
|
+
const lastConfirmed = new Date(entry.lastConfirmed);
|
|
63
|
+
const now = new Date();
|
|
64
|
+
const diffMs = now.getTime() - lastConfirmed.getTime();
|
|
65
|
+
const diffDays = diffMs / (1000 * 60 * 60 * 24);
|
|
66
|
+
return diffDays > expiryDays;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function parseLocalFile(filePath: string): CalibrationData | null {
|
|
70
|
+
if (!existsSync(filePath)) {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
try {
|
|
74
|
+
const content = readFileSync(filePath, "utf-8");
|
|
75
|
+
// oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: loadYaml returns unknown; validated by field access below
|
|
76
|
+
const data = loadYaml(content) as CalibrationData | null;
|
|
77
|
+
if (data === null || typeof data !== "object") {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
return data;
|
|
81
|
+
} catch {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function parseSharedFile(filePath: string): {
|
|
87
|
+
settings?: Partial<CalibrationSettings>;
|
|
88
|
+
entries: CalibrationEntry[];
|
|
89
|
+
} {
|
|
90
|
+
if (!existsSync(filePath)) {
|
|
91
|
+
return { entries: [] };
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
const content = readFileSync(filePath, "utf-8");
|
|
95
|
+
// oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Why: loadYaml returns unknown; validated by field access below
|
|
96
|
+
const data = loadYaml(content) as SharedConfig | null;
|
|
97
|
+
if (data?.calibration === undefined) {
|
|
98
|
+
return { entries: [] };
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
settings: data.calibration.settings,
|
|
102
|
+
entries: data.calibration.entries ?? [],
|
|
103
|
+
};
|
|
104
|
+
} catch {
|
|
105
|
+
return { entries: [] };
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function entryKey(entry: CalibrationEntry): string {
|
|
110
|
+
return `${entry.pattern}|||${entry.context}`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Load and merge calibration from local + shared sources, filtering expired entries.
|
|
115
|
+
*
|
|
116
|
+
* Merge precedence: local entries override shared entries when both match on
|
|
117
|
+
* pattern + context. Settings precedence: local > shared > default (30 days).
|
|
118
|
+
*/
|
|
119
|
+
export function loadCalibration(repoRoot: string): {
|
|
120
|
+
active: CalibrationEntry[];
|
|
121
|
+
expired: CalibrationEntry[];
|
|
122
|
+
} {
|
|
123
|
+
const localPath = path.join(repoRoot, LOCAL_PATH);
|
|
124
|
+
const sharedPath = path.join(repoRoot, SHARED_PATH);
|
|
125
|
+
|
|
126
|
+
const local = parseLocalFile(localPath);
|
|
127
|
+
const shared = parseSharedFile(sharedPath);
|
|
128
|
+
|
|
129
|
+
// Settings precedence: local > shared > default
|
|
130
|
+
const expiryDays =
|
|
131
|
+
local?.settings?.expiryDays ?? shared.settings?.expiryDays ?? DEFAULT_EXPIRY_DAYS;
|
|
132
|
+
|
|
133
|
+
// Merge: start with shared entries, then overlay local entries by key
|
|
134
|
+
const merged = new Map<string, CalibrationEntry>();
|
|
135
|
+
for (const entry of shared.entries) {
|
|
136
|
+
merged.set(entryKey(entry), entry);
|
|
137
|
+
}
|
|
138
|
+
for (const entry of local?.entries ?? []) {
|
|
139
|
+
merged.set(entryKey(entry), entry);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const active: CalibrationEntry[] = [];
|
|
143
|
+
const expired: CalibrationEntry[] = [];
|
|
144
|
+
|
|
145
|
+
for (const entry of merged.values()) {
|
|
146
|
+
if (isExpired(entry, expiryDays)) {
|
|
147
|
+
expired.push(entry);
|
|
148
|
+
} else {
|
|
149
|
+
active.push(entry);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return { active, expired };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Format active calibration entries as a markdown preamble for reviewer injection.
|
|
158
|
+
* Returns empty string if no entries.
|
|
159
|
+
*/
|
|
160
|
+
export function formatCalibrationPreamble(entries: CalibrationEntry[]): string {
|
|
161
|
+
if (entries.length === 0) {
|
|
162
|
+
return "";
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const lines: string[] = [
|
|
166
|
+
"## Learned Calibration (from prior review sessions)",
|
|
167
|
+
"",
|
|
168
|
+
"The following patterns have been consistently downgraded by validators in this",
|
|
169
|
+
"project. Adjust your severity accordingly — do not inflate these categories:",
|
|
170
|
+
"",
|
|
171
|
+
];
|
|
172
|
+
|
|
173
|
+
for (const entry of entries) {
|
|
174
|
+
lines.push(
|
|
175
|
+
`- "${entry.pattern}" in ${entry.context} → ${entry.adjustedSeverity} (not ${entry.originalSeverity}). Confirmed ${entry.observedCount}x.`,
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return lines.join("\n") + "\n";
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Write calibration data to .ai/deepreview/calibration.yml.
|
|
184
|
+
* Creates the .ai/deepreview/ directory if it doesn't exist.
|
|
185
|
+
*/
|
|
186
|
+
export function writeCalibration(repoRoot: string, data: CalibrationData): void {
|
|
187
|
+
const dirPath = path.join(repoRoot, ".ai", "deepreview");
|
|
188
|
+
if (!existsSync(dirPath)) {
|
|
189
|
+
mkdirSync(dirPath, { recursive: true });
|
|
190
|
+
}
|
|
191
|
+
const filePath = path.join(dirPath, "calibration.yml");
|
|
192
|
+
const yaml = dumpYaml(data, { lineWidth: 120, noRefs: true });
|
|
193
|
+
writeFileSync(filePath, `# Auto-maintained by deepreview. User-editable.\n${yaml}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Generate the next sequential calibration entry ID.
|
|
198
|
+
* Parses existing "cal-NNN" IDs and returns "cal-(max+1)".
|
|
199
|
+
*/
|
|
200
|
+
export function nextId(existing: CalibrationEntry[]): string {
|
|
201
|
+
let max = 0;
|
|
202
|
+
for (const entry of existing) {
|
|
203
|
+
const match = /^cal-(\d+)$/u.exec(entry.id);
|
|
204
|
+
if (match) {
|
|
205
|
+
const num = Number.parseInt(match[1], 10);
|
|
206
|
+
if (num > max) {
|
|
207
|
+
max = num;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return `cal-${String(max + 1).padStart(3, "0")}`;
|
|
212
|
+
}
|
package/src/project-context.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import { existsSync, readFileSync } from "node:fs";
|
|
9
9
|
import path from "node:path";
|
|
10
10
|
import { load as loadYaml } from "js-yaml";
|
|
11
|
+
import type { Severity } from "./calibration";
|
|
11
12
|
|
|
12
13
|
export interface ProjectMetadata {
|
|
13
14
|
/** Semantic version (e.g., "0.1.0", "3.2.1") */
|
|
@@ -28,6 +29,20 @@ export interface DeepReviewConfig {
|
|
|
28
29
|
threatModel?: "localhost-only" | "internal-network" | "public-facing" | "library";
|
|
29
30
|
/** Additional context hints for reviewers */
|
|
30
31
|
context?: string;
|
|
32
|
+
/** Shared calibration entries and settings (opt-in team calibration) */
|
|
33
|
+
calibration?: {
|
|
34
|
+
settings?: { expiryDays?: number };
|
|
35
|
+
entries?: Array<{
|
|
36
|
+
id: string;
|
|
37
|
+
pattern: string;
|
|
38
|
+
context: string;
|
|
39
|
+
originalSeverity: Severity;
|
|
40
|
+
adjustedSeverity: Severity;
|
|
41
|
+
observedCount: number;
|
|
42
|
+
lastConfirmed: string;
|
|
43
|
+
createdAt: string;
|
|
44
|
+
}>;
|
|
45
|
+
};
|
|
31
46
|
}
|
|
32
47
|
|
|
33
48
|
interface PackageJson {
|