@mechanai/deepreview 2.1.5 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/agents/deepreview-applier.md +25 -1
- package/.opencode/agents/deepreview-architecture.md +5 -0
- package/.opencode/agents/deepreview-compatibility.md +4 -0
- package/.opencode/agents/deepreview-correctness.md +5 -0
- package/.opencode/agents/deepreview-docs.md +7 -1
- package/.opencode/agents/deepreview-planner.md +6 -0
- package/.opencode/agents/deepreview-review-formatter.md +21 -5
- package/.opencode/agents/deepreview-security.md +5 -0
- package/.opencode/agents/deepreview-validator.md +22 -10
- package/.opencode/commands/deepreview-loop.md +67 -18
- package/.opencode/commands/deepreview-spec-loop.md +2 -2
- package/README.md +17 -0
- package/package.json +1 -1
- package/src/diff-classifier.ts +11 -1
- package/src/parse-threads.test.ts +84 -15
- package/src/parse-threads.ts +22 -4
- package/src/post-review.test.ts +31 -1
- package/src/post-review.ts +47 -71
- package/src/review-api.ts +8 -0
- package/src/review-helpers.ts +66 -0
|
@@ -6,6 +6,10 @@ permission:
|
|
|
6
6
|
edit: allow
|
|
7
7
|
bash:
|
|
8
8
|
"git diff*": allow
|
|
9
|
+
"mise run fmt*": allow
|
|
10
|
+
"mise run lint*": allow
|
|
11
|
+
"mise run check*": allow
|
|
12
|
+
"mise run test*": allow
|
|
9
13
|
"*": deny
|
|
10
14
|
---
|
|
11
15
|
|
|
@@ -21,7 +25,7 @@ For each fix in the plan, in the order specified by the "Order of Operations" se
|
|
|
21
25
|
|
|
22
26
|
1. Read the current file at the referenced location
|
|
23
27
|
2. Apply the code change exactly as specified in the plan
|
|
24
|
-
3. **Globalize check:** After applying, check whether other files _listed in input.txt or the plan_ have the same pattern. If so, apply the equivalent fix there too. Do NOT search the broader codebase. Common cases:
|
|
28
|
+
3. **Globalize check:** After applying, check whether other files _listed in input.txt or the plan_ have the same pattern. If so, apply the equivalent fix there too. Do NOT search the broader codebase. To identify "listed files": for diff inputs, use files from `diff --git a/... b/...` headers; for concatenated file inputs, use files from `=== filename ===` headers. Common cases:
|
|
25
29
|
- A loop command fix that applies to the other loop command (code-loop ↔ spec-loop)
|
|
26
30
|
- A prompt/contract change affecting multiple agent files
|
|
27
31
|
- A variable rename or policy change referenced in multiple files
|
|
@@ -30,6 +34,24 @@ For each fix in the plan, in the order specified by the "Order of Operations" se
|
|
|
30
34
|
|
|
31
35
|
If a fix cannot be applied (file doesn't exist, code doesn't match what was expected), skip it and note the failure.
|
|
32
36
|
|
|
37
|
+
## Scope rules
|
|
38
|
+
|
|
39
|
+
- Apply ONLY what the plan specifies. Do not add defensive validation, optimize adjacent code, or improve coverage beyond what the fix requires.
|
|
40
|
+
- If the plan's code change seems incomplete or wrong, apply it anyway and note the concern — do not improvise a "better" fix.
|
|
41
|
+
|
|
42
|
+
## Verification (after all fixes are applied)
|
|
43
|
+
|
|
44
|
+
After applying all fixes, run verification if `mise.toml` exists in the project root:
|
|
45
|
+
|
|
46
|
+
1. Run `mise run fmt` (auto-fix formatting — this is expected to modify files)
|
|
47
|
+
2. Run `mise run lint` or `mise run check` (whichever exists)
|
|
48
|
+
3. Run `mise run test`
|
|
49
|
+
|
|
50
|
+
If lint/check/test fails:
|
|
51
|
+
|
|
52
|
+
- Include the error output in your response
|
|
53
|
+
- Mark the relevant fix as FAILED with the error
|
|
54
|
+
|
|
33
55
|
## Response contract
|
|
34
56
|
|
|
35
57
|
Your ONLY response must be a list of files modified, one per line, in this format:
|
|
@@ -37,6 +59,8 @@ Your ONLY response must be a list of files modified, one per line, in this forma
|
|
|
37
59
|
```
|
|
38
60
|
APPLIED: path/to/file.ts — [one-line description of change]
|
|
39
61
|
SKIPPED: path/to/other.ts — [reason it couldn't be applied]
|
|
62
|
+
FAILED: path/to/broken.ts — [lint/test error message]
|
|
63
|
+
VERIFICATION: [PASS | FAIL — summary of fmt/lint/test results]
|
|
40
64
|
```
|
|
41
65
|
|
|
42
66
|
Do not include any other text.
|
|
@@ -36,6 +36,11 @@ Your prompt may also begin with framing directives (e.g., novelty-seeking instru
|
|
|
36
36
|
|
|
37
37
|
Use `git log` on changed files to understand the evolution of the code.
|
|
38
38
|
|
|
39
|
+
## Scope constraints
|
|
40
|
+
|
|
41
|
+
- **Only flag issues attributable to the diff under review.** Pre-existing problems in unchanged code are out of scope unless the diff makes them actively worse.
|
|
42
|
+
- Focus on structural and design issues, not cosmetic ones.
|
|
43
|
+
|
|
39
44
|
## Output format
|
|
40
45
|
|
|
41
46
|
Write your review to the output path provided. Use this format for each finding:
|
|
@@ -38,6 +38,10 @@ Your prompt may also begin with framing directives (e.g., novelty-seeking instru
|
|
|
38
38
|
|
|
39
39
|
Use `git log` and `git show` to check if removed/changed items had external consumers.
|
|
40
40
|
|
|
41
|
+
## Scope constraints
|
|
42
|
+
|
|
43
|
+
- **Only flag issues attributable to the diff under review.** Pre-existing compatibility concerns in unchanged code are out of scope unless the diff makes them actively worse.
|
|
44
|
+
|
|
41
45
|
## Output format
|
|
42
46
|
|
|
43
47
|
Write your review to the output path provided. Use this format for each finding:
|
|
@@ -38,6 +38,11 @@ Your prompt may also begin with framing directives (e.g., novelty-seeking instru
|
|
|
38
38
|
|
|
39
39
|
Use `git blame` and `git log` on changed files to understand intent when unclear.
|
|
40
40
|
|
|
41
|
+
## Scope constraints
|
|
42
|
+
|
|
43
|
+
- **Only flag issues attributable to the diff under review.** Pre-existing bugs in unchanged code are out of scope unless the diff makes them actively worse.
|
|
44
|
+
- Focus on correctness of the new/changed code, not unrelated pre-existing issues.
|
|
45
|
+
|
|
41
46
|
## Output format
|
|
42
47
|
|
|
43
48
|
Write your review to the output path provided. Use this format for each finding:
|
|
@@ -49,10 +49,16 @@ Write your review to the output path provided. Use this format for each finding:
|
|
|
49
49
|
|
|
50
50
|
Severity guide:
|
|
51
51
|
|
|
52
|
-
- **critical:** Doc/comment claims something false
|
|
52
|
+
- **critical:** Doc/comment claims something false that would cause an implementer to build the wrong thing or misuse an API. Stale wording that is obviously outdated (and thus unlikely to mislead) is NOT critical.
|
|
53
53
|
- **warning:** Duplicate or stale content that wastes reader attention
|
|
54
54
|
- **suggestion:** Verbose text that could be tightened
|
|
55
55
|
|
|
56
|
+
## Scope constraints
|
|
57
|
+
|
|
58
|
+
- **Only flag issues attributable to the diff under review.** Pre-existing documentation problems in unchanged code are out of scope unless the diff makes them actively worse.
|
|
59
|
+
- **ADRs (Architecture Decision Records) are historical documents.** Do not flag them for being "stale" — they record the decision at the time it was made. Only flag ADRs if the diff explicitly modifies them and introduces inconsistencies.
|
|
60
|
+
- **Test code cosmetics** (test function names, test descriptions) are suggestions at most, never warnings or critical.
|
|
61
|
+
|
|
56
62
|
If you find no issues, write: "No documentation issues found."
|
|
57
63
|
|
|
58
64
|
Be concise. No preamble or filler. Each finding should be actionable in 3-5 lines. If you find no issues in a category, say so in one line.
|
|
@@ -23,6 +23,12 @@ You will receive a path to a synthesis file. Read it.
|
|
|
23
23
|
2. For each finding, read ONLY the specific function or block referenced (use the Read tool with offset/limit to read ~50 lines around the referenced line — do NOT read entire files)
|
|
24
24
|
3. Write exact code changes for each fix
|
|
25
25
|
|
|
26
|
+
## Quality rules
|
|
27
|
+
|
|
28
|
+
- **One clean solution per fix.** Do not include your reasoning process, rejected approaches, or self-corrections in the output. If you are unsure which approach is best, pick the simplest one and add a one-line "Alternative:" note.
|
|
29
|
+
- **Stay within scope.** Only fix what the synthesis identifies. Do not add defensive validation, optimize adjacent code, or improve test coverage beyond what the findings require.
|
|
30
|
+
- **Concrete, not aspirational.** Every code change must be copy-pasteable. No pseudocode, no "something like this", no TODOs.
|
|
31
|
+
|
|
26
32
|
## Output format
|
|
27
33
|
|
|
28
34
|
Write your implementation plan to the output path provided. Use this structure:
|
|
@@ -22,18 +22,34 @@ Read both files.
|
|
|
22
22
|
## Process
|
|
23
23
|
|
|
24
24
|
1. Read the synthesis and identify every individual finding (each bullet or paragraph that describes a distinct issue)
|
|
25
|
-
2.
|
|
25
|
+
2. If the synthesis contains an "Overall Assessment" section, emit it as the **first document** with frontmatter `summary: true` (no `path` or `line`). The body should be the assessment text, lightly edited for brevity.
|
|
26
|
+
3. For each finding, determine:
|
|
26
27
|
- `path`: the file path (relative to repo root) the finding refers to
|
|
27
28
|
- `line`: the specific line number (new-side of diff). If the synthesis gives a range, use the end line.
|
|
28
29
|
- `startLine`: if the finding spans multiple lines, use the start of the range. Omit if single-line.
|
|
29
|
-
|
|
30
|
+
4. Read `input.txt` (the diff) to:
|
|
30
31
|
- Verify line references are correct
|
|
31
32
|
- Generate ` ```suggestion ` blocks where a concrete fix is obvious and fits within the diff
|
|
32
|
-
|
|
33
|
+
5. Write each finding as a document in the output file
|
|
33
34
|
|
|
34
35
|
## Output format
|
|
35
36
|
|
|
36
|
-
Write to the output path provided.
|
|
37
|
+
Write to the output path provided. The file has two parts:
|
|
38
|
+
|
|
39
|
+
### 1. Summary document (first, when synthesis has an Overall Assessment)
|
|
40
|
+
|
|
41
|
+
If the synthesis contains an "Overall Assessment" section, the first document must have `summary: true` in its frontmatter. Its body is a 2-3 sentence overall assessment. This appears as the review body on GitHub. Omit this document only if the synthesis has no assessment section.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
---
|
|
45
|
+
summary: true
|
|
46
|
+
---
|
|
47
|
+
<2-3 sentence overall assessment from the synthesis>
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 2. Finding documents (one per finding)
|
|
51
|
+
|
|
52
|
+
Each finding follows the summary, separated by `---`:
|
|
37
53
|
|
|
38
54
|
```
|
|
39
55
|
---
|
|
@@ -49,7 +65,7 @@ line: <line number>
|
|
|
49
65
|
- One finding per document. Never bundle multiple issues.
|
|
50
66
|
- No stats, severity counts, or framing ("3 critical issues found")
|
|
51
67
|
- No references to local file paths, session directories, AI tooling, or the deepreview pipeline
|
|
52
|
-
- Use permalinks for code references: `https://github.com/OWNER/REPO/blob
|
|
68
|
+
- Use permalinks for code references: `https://github.com/OWNER/REPO/blob/<PR_HEAD_SHA>/<path>#L<line>`
|
|
53
69
|
- Get OWNER/REPO from the diff header or from `input.txt` context
|
|
54
70
|
- Use ` ```suggestion ` blocks where a concrete fix is obvious
|
|
55
71
|
- American English. Succinct. No filler.
|
|
@@ -36,6 +36,11 @@ Your prompt may also begin with framing directives (e.g., novelty-seeking instru
|
|
|
36
36
|
|
|
37
37
|
Use `git blame` and `git log` on changed files to understand intent when unclear.
|
|
38
38
|
|
|
39
|
+
## Scope constraints
|
|
40
|
+
|
|
41
|
+
- **Only flag issues attributable to the diff under review.** Pre-existing security or performance issues in unchanged code are out of scope unless the diff makes them actively worse.
|
|
42
|
+
- **Test code patterns** (test fixtures, test helpers, deliberate test doubles) should only be flagged if they could leak into production or mask real bugs. `std::mem::forget` in a test to keep a tempdir alive is not a security concern.
|
|
43
|
+
|
|
39
44
|
## Output format
|
|
40
45
|
|
|
41
46
|
Write your review to the output path provided. Use this format for each finding:
|
|
@@ -14,26 +14,38 @@ permission:
|
|
|
14
14
|
"*": deny
|
|
15
15
|
---
|
|
16
16
|
|
|
17
|
-
You are a skeptical senior engineer. Your job is to cross-validate code review findings by checking every claim against the actual source code. You are not here to agree — you are here to disprove.
|
|
17
|
+
You are a skeptical senior engineer. Your job is to cross-validate code review findings by checking every claim against the actual source code. You are not here to agree — you are here to disprove. Your default stance is rejection; a finding must earn its place with verifiable evidence.
|
|
18
18
|
|
|
19
19
|
## Input
|
|
20
20
|
|
|
21
|
-
You will receive paths to
|
|
21
|
+
You will receive paths to review files and a perspective label. Read all review files.
|
|
22
22
|
|
|
23
23
|
## Process
|
|
24
24
|
|
|
25
|
-
For each finding in all
|
|
25
|
+
For each finding in all reviews:
|
|
26
26
|
|
|
27
27
|
1. Read the source file and line referenced in the finding
|
|
28
|
-
2.
|
|
29
|
-
3.
|
|
30
|
-
4.
|
|
31
|
-
5.
|
|
32
|
-
|
|
28
|
+
2. **Verify the reference exists.** If the finding claims something exists at a specific file:line (a function, a reference, a pattern), confirm that thing actually exists at that location. If it doesn't, classify as disproved.
|
|
29
|
+
3. Determine if the claimed issue actually exists in the code
|
|
30
|
+
4. If the finding makes claims about external tool behavior (CLI flags, API parameters, library methods), **verify those claims**. Run `--help`, check man pages, or use WebFetch to check documentation. If the claimed behavior doesn't exist, classify as disproved.
|
|
31
|
+
5. Check if the issue is already handled elsewhere (error handling, validation, guards)
|
|
32
|
+
6. **Assess severity proportionality.** If the finding's severity is more than one level above what the evidence supports (e.g., a stale comment rated "critical" when it's clearly a "suggestion"), downgrade it or classify as trivial.
|
|
33
|
+
7. Classify the finding:
|
|
34
|
+
- **confirmed** (high confidence): you verified the issue exists in the code and the severity is proportionate
|
|
33
35
|
- **plausible** (medium confidence): the issue might exist but you cannot fully verify
|
|
34
|
-
- **
|
|
36
|
+
- **trivial**: the issue technically exists but is not worth fixing — severity is inflated, the fix is cosmetic, or the finding is a style preference rather than an objective defect
|
|
37
|
+
- **disproved** (low confidence): the code already handles this, the claim is wrong, the referenced location doesn't contain what's claimed, or the finding assumes external tool/API behavior that doesn't exist
|
|
35
38
|
|
|
36
|
-
Discard all
|
|
39
|
+
Discard all **disproved** and **trivial** findings entirely.
|
|
40
|
+
|
|
41
|
+
## Rejection criteria (discard the finding if ANY apply)
|
|
42
|
+
|
|
43
|
+
- The referenced file:line does not contain what the finding claims
|
|
44
|
+
- The finding flags a pre-existing issue in unchanged code that the diff does not make worse
|
|
45
|
+
- The severity is inflated by more than one level (e.g., a typo in a comment rated "critical")
|
|
46
|
+
- The finding is a design opinion or stylistic preference, not an objective defect
|
|
47
|
+
- The finding duplicates another reviewer's finding on the same file:line (note the overlap, keep only one)
|
|
48
|
+
- The finding references a historical document (ADR, changelog) as "stale" when the document is intentionally historical
|
|
37
49
|
|
|
38
50
|
## Output format
|
|
39
51
|
|
|
@@ -8,13 +8,14 @@ STEP 1: DETERMINE INPUT MODE
|
|
|
8
8
|
Parse "$ARGUMENTS" the same way as /deepreview:
|
|
9
9
|
|
|
10
10
|
- If it starts with `--context <path>`, extract CONTEXT_FILE=<path> and remove it from $ARGUMENTS before parsing the rest.
|
|
11
|
-
- Validate CONTEXT_FILE
|
|
11
|
+
- Validate `CONTEXT_FILE`: it must be a relative path (no leading `/`), must not contain `..`, must exist on disk, and must be a regular file (not a directory or symlink to outside the project), and must be under 50KB. If validation fails, tell the user the error and STOP.
|
|
12
|
+
_(Canonical source for `CONTEXT_FILE` validation rules. Keep `deepreview.md`, `deepreview-spec.md`, and `deepreview-spec-loop.md` in sync.)_
|
|
12
13
|
- If it is a number → MODE=pr, TARGET="$ARGUMENTS"
|
|
13
14
|
- If it is a file path or multiple file paths → MODE=files, TARGET="$ARGUMENTS"
|
|
14
15
|
- If it is empty → MODE=branch, TARGET=""
|
|
15
16
|
|
|
16
17
|
Set ITERATION=1
|
|
17
|
-
Set PRIOR_CONTEXT="" (empty — built up across iterations)
|
|
18
|
+
Set PRIOR_CONTEXT="" (empty — built up across iterations; holds both design context and prior findings)
|
|
18
19
|
Set ALL_SESSION_DIRS=[] (list of all session directories used, in order)
|
|
19
20
|
|
|
20
21
|
If CONTEXT_FILE exists, read its contents and set PRIOR_CONTEXT to:
|
|
@@ -43,9 +44,20 @@ Dispatch the applier automatically — do NOT ask the user for permission.
|
|
|
43
44
|
Use the Task tool with subagent_type="deepreview-applier":
|
|
44
45
|
"Read the implementation plan at $SESSION_DIR/implementation-plan.md. Apply the fixes."
|
|
45
46
|
|
|
46
|
-
Wait for the applier to return.
|
|
47
|
+
Wait for the applier to return. Parse the applier's response for VERIFICATION status.
|
|
47
48
|
|
|
48
|
-
STEP
|
|
49
|
+
STEP 4b: HANDLE VERIFICATION RESULTS
|
|
50
|
+
If the applier reports VERIFICATION: FAIL:
|
|
51
|
+
|
|
52
|
+
- Show the user the error summary from the applier's response
|
|
53
|
+
- Ask: "Applied fixes failed verification (lint/test). Options: revert and skip failing fix, continue anyway, or stop?"
|
|
54
|
+
- If revert: run `git checkout -- .` to undo all changes from this iteration, note which fix failed, add it to a SKIP_LIST, and re-run the planner+applier without that fix.
|
|
55
|
+
- If continue: proceed to STEP 5 (the next iteration's reviewers will likely catch the introduced error).
|
|
56
|
+
- If stop: STOP.
|
|
57
|
+
|
|
58
|
+
If the applier reports VERIFICATION: PASS (or no verification was possible): proceed to STEP 5.
|
|
59
|
+
|
|
60
|
+
STEP 5: INCREMENT AND RE-REVIEW
|
|
49
61
|
Set ITERATION = ITERATION + 1
|
|
50
62
|
|
|
51
63
|
If ITERATION > 5:
|
|
@@ -67,36 +79,47 @@ Prepare fresh input:
|
|
|
67
79
|
|
|
68
80
|
Check if input.txt is empty. If empty, tell user "Nothing to review — all changes resolved." and STOP.
|
|
69
81
|
|
|
70
|
-
STEP 5a:
|
|
82
|
+
STEP 5a: DIFF SIZE DIVERGENCE CHECK
|
|
83
|
+
Compare the size of the new input.txt to the previous iteration's input.txt (in bytes or lines).
|
|
84
|
+
If the new input is more than 50% larger than the previous iteration's input:
|
|
85
|
+
|
|
86
|
+
- Tell the user: "Divergence warning: diff grew from ~N to ~M lines (X% increase). The applier may be adding more code than it's fixing."
|
|
87
|
+
- Ask: "Continue with the larger diff, or revert last iteration's changes?"
|
|
88
|
+
- If revert: run `git checkout -- .`, STOP.
|
|
89
|
+
- If continue: proceed.
|
|
90
|
+
|
|
91
|
+
STEP 5b: BUILD PRIOR CONTEXT
|
|
71
92
|
Accumulate findings from ALL previous iterations into PRIOR_CONTEXT so no finding is re-reported.
|
|
72
93
|
|
|
73
|
-
To build this, dispatch a helper task that reads ALL previous syntheses:
|
|
94
|
+
To build this, dispatch a helper task that reads ALL previous syntheses AND implementation plans:
|
|
74
95
|
NOTE: Interpolate the actual directory paths from ALL_SESSION_DIRS into this task string — the subagent cannot access your variables.
|
|
75
96
|
Task — Use the Task tool with subagent_type="general":
|
|
76
|
-
"Read the synthesis files from these directories: [LIST EACH PATH FROM ALL_SESSION_DIRS EXCLUDING CURRENT]. If any
|
|
97
|
+
"Read the synthesis files AND implementation plan files from these directories: [LIST EACH PATH FROM ALL_SESSION_DIRS EXCLUDING CURRENT]. If any file does not exist, skip it. Extract:
|
|
77
98
|
|
|
78
99
|
## Prior Findings (already reported — do not re-report or verify)
|
|
79
100
|
|
|
80
101
|
- [Short Issue Title] ([category]) — [file:line]
|
|
81
102
|
|
|
103
|
+
## Applied Fixes (changes made by previous iterations — new bugs here are regressions)
|
|
104
|
+
|
|
105
|
+
- [Fix title from implementation plan] — [file:line] (applied in iter N)
|
|
106
|
+
|
|
82
107
|
## Covered Regions (already examined — prioritize elsewhere)
|
|
83
108
|
|
|
84
109
|
- [file:line-range] (pad each finding's file:line by 20 lines in each direction)
|
|
85
110
|
|
|
86
|
-
Deduplicate findings that appear in multiple syntheses. Return ONLY these
|
|
111
|
+
Deduplicate findings that appear in multiple syntheses. Return ONLY these three sections, nothing else."
|
|
87
112
|
|
|
88
113
|
Set PRIOR_CONTEXT to the returned text. Validate that it contains "## Prior Findings" — if not, warn the user ("Helper returned malformed prior context — proceeding without deduplication") and set PRIOR_CONTEXT="". If CONTEXT_FILE exists, prepend:
|
|
89
114
|
"## Design Decisions (intentional — do not flag)\nThe following are deliberate design choices. Do NOT flag these as issues or suggest alternatives.\n`\n" + contents of CONTEXT_FILE + "\n`\n\n"
|
|
90
115
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
The key difference: iteration 2+ skips cross-validation. This prevents validators from filtering out new issues introduced by fixes.
|
|
116
|
+
STEP 5c: RUN REVIEW WITH CROSS-VALIDATION
|
|
94
117
|
|
|
95
118
|
Stage 1 — DISPATCH 5 PARALLEL REVIEWERS:
|
|
96
119
|
Each reviewer prompt MUST include PRIOR_CONTEXT and the novelty-seeking framing below.
|
|
97
120
|
|
|
98
121
|
The REVIEWER_PREAMBLE for all iter2+ reviewers is:
|
|
99
|
-
"Your goal is to find issues that PREVIOUS reviewers missed. Do NOT re-report, verify, or comment on prior findings.
|
|
122
|
+
"Your goal is to find issues that PREVIOUS reviewers missed. Do NOT re-report, verify, or comment on prior findings. If you find a bug in code listed under 'Applied Fixes', flag it as a regression.
|
|
100
123
|
|
|
101
124
|
$PRIOR_CONTEXT
|
|
102
125
|
|
|
@@ -129,14 +152,39 @@ Read the content at $SESSION_DIR/input.txt. Write your review to $SESSION_DIR/re
|
|
|
129
152
|
|
|
130
153
|
Wait for all 5. Record which succeeded.
|
|
131
154
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
155
|
+
STEP 5d: VERIFY REVIEWER OUTPUT
|
|
156
|
+
Check how many review files were actually written. Run: `ls $SESSION_DIR/review-*.md 2>/dev/null | wc -l`
|
|
157
|
+
|
|
158
|
+
- If 0 files exist: Tell the user "All reviewers failed to produce output. This usually means the diff is too large for subagent context windows or there was an infrastructure failure." STOP.
|
|
159
|
+
- If 1-2 files exist: Warn the user "Only N/5 reviewers produced output. Proceeding with partial results." Continue with what exists.
|
|
160
|
+
- If 3+ files exist: Proceed normally.
|
|
161
|
+
|
|
162
|
+
Stage 2 — DISPATCH 5 PARALLEL VALIDATORS (cross-validation):
|
|
163
|
+
Task 6 — Use the Task tool with subagent_type="deepreview-validator":
|
|
164
|
+
"Your perspective: correctness. Read all review files at: $SESSION_DIR/review-correctness.md, $SESSION_DIR/review-security.md, $SESSION_DIR/review-architecture.md, $SESSION_DIR/review-docs.md, $SESSION_DIR/review-compatibility.md. Also read the original input at $SESSION_DIR/input.txt for context. Write your validated review to $SESSION_DIR/validated-correctness.md."
|
|
165
|
+
|
|
166
|
+
Task 7 — Use the Task tool with subagent_type="deepreview-validator":
|
|
167
|
+
"Your perspective: security. Read all review files at: $SESSION_DIR/review-correctness.md, $SESSION_DIR/review-security.md, $SESSION_DIR/review-architecture.md, $SESSION_DIR/review-docs.md, $SESSION_DIR/review-compatibility.md. Also read the original input at $SESSION_DIR/input.txt for context. Write your validated review to $SESSION_DIR/validated-security.md."
|
|
168
|
+
|
|
169
|
+
Task 8 — Use the Task tool with subagent_type="deepreview-validator":
|
|
170
|
+
"Your perspective: architecture. Read all review files at: $SESSION_DIR/review-correctness.md, $SESSION_DIR/review-security.md, $SESSION_DIR/review-architecture.md, $SESSION_DIR/review-docs.md, $SESSION_DIR/review-compatibility.md. Also read the original input at $SESSION_DIR/input.txt for context. Write your validated review to $SESSION_DIR/validated-architecture.md."
|
|
171
|
+
|
|
172
|
+
Task 9 — Use the Task tool with subagent_type="deepreview-validator":
|
|
173
|
+
"Your perspective: docs. Read all review files at: $SESSION_DIR/review-correctness.md, $SESSION_DIR/review-security.md, $SESSION_DIR/review-architecture.md, $SESSION_DIR/review-docs.md, $SESSION_DIR/review-compatibility.md. Also read the original input at $SESSION_DIR/input.txt for context. Write your validated review to $SESSION_DIR/validated-docs.md."
|
|
174
|
+
|
|
175
|
+
Task 10 — Use the Task tool with subagent_type="deepreview-validator":
|
|
176
|
+
"Your perspective: compatibility. Read all review files at: $SESSION_DIR/review-correctness.md, $SESSION_DIR/review-security.md, $SESSION_DIR/review-architecture.md, $SESSION_DIR/review-docs.md, $SESSION_DIR/review-compatibility.md. Also read the original input at $SESSION_DIR/input.txt for context. Write your validated review to $SESSION_DIR/validated-compatibility.md."
|
|
177
|
+
|
|
178
|
+
Wait for all 5 to return.
|
|
179
|
+
|
|
180
|
+
Stage 3 — DISPATCH SYNTHESIZER:
|
|
181
|
+
Task 11 — Use the Task tool with subagent_type="deepreview-synthesizer":
|
|
182
|
+
"Read the validated reviews at: $SESSION_DIR/validated-correctness.md, $SESSION_DIR/validated-security.md, $SESSION_DIR/validated-architecture.md, $SESSION_DIR/validated-docs.md, $SESSION_DIR/validated-compatibility.md (skip any that don't exist). Write the synthesis to $SESSION_DIR/synthesis.md."
|
|
135
183
|
|
|
136
184
|
Record the stats line.
|
|
137
185
|
|
|
138
186
|
Stage 4 — DISPATCH PLANNER:
|
|
139
|
-
Task
|
|
187
|
+
Task 12 — Use the Task tool with subagent_type="deepreview-planner":
|
|
140
188
|
"Read the synthesis at $SESSION_DIR/synthesis.md. Write the implementation plan to $SESSION_DIR/implementation-plan.md."
|
|
141
189
|
|
|
142
190
|
Record the summary line.
|
|
@@ -159,8 +207,9 @@ IMPORTANT RULES:
|
|
|
159
207
|
- Use ONLY the file paths and stats/summary lines returned by subagents.
|
|
160
208
|
- Apply ALL findings (critical, warning, AND suggestion) — the goal is a clean review.
|
|
161
209
|
- Do NOT ask the user for permission to apply fixes. Apply automatically.
|
|
162
|
-
- DO ask the user if iteration limit is hit
|
|
163
|
-
- Iteration 2+ MUST
|
|
210
|
+
- DO ask the user if: iteration limit is hit, deadlock is detected, verification fails, or diff size diverges.
|
|
211
|
+
- Iteration 2+ MUST include cross-validation, MUST include PRIOR_CONTEXT, and MUST use novelty-seeking framing.
|
|
164
212
|
- Iteration 2+ MUST NOT tell reviewers to "verify" or "check status of" prior findings.
|
|
165
213
|
- Each iteration uses a NEW session directory — never reuse a previous one.
|
|
166
214
|
- If --context file is provided, include its contents under "Design Decisions" in PRIOR_CONTEXT for ALL iterations (including iter1).
|
|
215
|
+
- If all reviewers produce zero output files, STOP immediately — do not continue to synthesis.
|
|
@@ -11,7 +11,7 @@ STEP 1: DETERMINE INPUT
|
|
|
11
11
|
- If remaining "$ARGUMENTS" is empty, tell the user "Usage: /deepreview-spec-loop [--context <file>] <file1> [file2 ...]" and STOP.
|
|
12
12
|
- Set FILES="$ARGUMENTS"
|
|
13
13
|
- Set ITERATION=1
|
|
14
|
-
- Set PRIOR_CONTEXT="" (empty — built up across iterations)
|
|
14
|
+
- Set PRIOR_CONTEXT="" (empty — built up across iterations; holds both design context and prior findings)
|
|
15
15
|
- Set ALL_SESSION_DIRS=[] (list of all session directories used, in order)
|
|
16
16
|
- If CONTEXT_FILE exists, set PRIOR_CONTEXT="## Design Decisions (intentional — do not flag)\nThe following are deliberate design choices. Do NOT flag these as issues or suggest alternatives.\n`\n" + contents of CONTEXT_FILE + "\n`\n\n"
|
|
17
17
|
|
|
@@ -81,7 +81,7 @@ Task — Use the Task tool with subagent_type="general":
|
|
|
81
81
|
|
|
82
82
|
Deduplicate findings that appear in multiple syntheses. Return ONLY these two sections, nothing else."
|
|
83
83
|
|
|
84
|
-
Set PRIOR_CONTEXT to the returned text. If CONTEXT_FILE exists, prepend:
|
|
84
|
+
Set PRIOR_CONTEXT to the returned text. Validate that it contains "## Prior Findings" — if not, warn the user ("Helper returned malformed prior context — proceeding without deduplication") and set PRIOR_CONTEXT="". If CONTEXT_FILE exists, prepend:
|
|
85
85
|
"## Design Decisions (intentional — do not flag)\nThe following are deliberate design choices. Do NOT flag these as issues or suggest alternatives.\n`\n" + contents of CONTEXT_FILE + "\n`\n\n"
|
|
86
86
|
|
|
87
87
|
The REVIEWER_PREAMBLE for all iter2+ reviewers is:
|
package/README.md
CHANGED
|
@@ -83,6 +83,23 @@ its own context, keeping token usage minimal.
|
|
|
83
83
|
- `git`
|
|
84
84
|
- `gh` CLI (only for PR commands)
|
|
85
85
|
|
|
86
|
+
## Upgrade
|
|
87
|
+
|
|
88
|
+
OpenCode caches plugins on first install and does not automatically check for newer versions.
|
|
89
|
+
To upgrade:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
rm -rf ~/.cache/opencode/packages/*deepreview*/
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Then restart OpenCode. It will re-fetch the latest version.
|
|
96
|
+
|
|
97
|
+
If you installed with `--local`, also re-run the setup script to update symlinks:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
bunx @mechanai/deepreview@latest/setup --local
|
|
101
|
+
```
|
|
102
|
+
|
|
86
103
|
> [!NOTE]
|
|
87
104
|
> If upgrading from the old `npx @anthropic/deepreview install` workflow, remove
|
|
88
105
|
> the old copied files first (`rm ~/.config/opencode/agents/deepreview*
|
package/package.json
CHANGED
package/src/diff-classifier.ts
CHANGED
|
@@ -22,8 +22,18 @@ interface FileHunks {
|
|
|
22
22
|
* Tier 2: file-level (file in diff but line not in any hunk)
|
|
23
23
|
* Tier 3: review body (file not in diff)
|
|
24
24
|
*/
|
|
25
|
+
// 5MB — diffs larger than this are truncated before parsing
|
|
26
|
+
const MAX_DIFF_SIZE = 5 * 1024 * 1024;
|
|
27
|
+
|
|
25
28
|
export function classifyFindings(findings: Finding[], diffText: string): ClassifiedFinding[] {
|
|
26
|
-
|
|
29
|
+
let effectiveDiff = diffText;
|
|
30
|
+
if (diffText.length > MAX_DIFF_SIZE) {
|
|
31
|
+
console.warn(
|
|
32
|
+
`WARN: Diff size (${(diffText.length / 1024 / 1024).toFixed(1)}MB) exceeds ${MAX_DIFF_SIZE / 1024 / 1024}MB limit. Truncating — some findings may be demoted to tier 3.`,
|
|
33
|
+
);
|
|
34
|
+
effectiveDiff = diffText.slice(0, MAX_DIFF_SIZE);
|
|
35
|
+
}
|
|
36
|
+
const parsed = parseDiff(effectiveDiff);
|
|
27
37
|
const fileMap = buildFileMap(parsed);
|
|
28
38
|
|
|
29
39
|
return findings.map((finding) => {
|
|
@@ -2,7 +2,76 @@ import { describe, it } from "bun:test";
|
|
|
2
2
|
import assert from "node:assert/strict";
|
|
3
3
|
import { parseThreads } from "./parse-threads.ts";
|
|
4
4
|
|
|
5
|
-
describe("parseThreads", () => {
|
|
5
|
+
describe("parseThreads summary extraction", () => {
|
|
6
|
+
it("extracts a summary document marked with summary: true", () => {
|
|
7
|
+
const input = [
|
|
8
|
+
"---",
|
|
9
|
+
"summary: true",
|
|
10
|
+
"---",
|
|
11
|
+
"Overall this PR looks good with minor issues.",
|
|
12
|
+
"---",
|
|
13
|
+
"path: src/main.go",
|
|
14
|
+
"line: 10",
|
|
15
|
+
"---",
|
|
16
|
+
"Error not propagated.",
|
|
17
|
+
].join("\n");
|
|
18
|
+
|
|
19
|
+
const result = parseThreads(input);
|
|
20
|
+
assert.equal(result.summary, "Overall this PR looks good with minor issues.");
|
|
21
|
+
assert.equal(result.findings.length, 1);
|
|
22
|
+
assert.equal(result.findings[0].path, "src/main.go");
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("returns undefined summary when no summary document exists", () => {
|
|
26
|
+
const input = ["---", "path: src/main.go", "line: 10", "---", "Error not propagated."].join(
|
|
27
|
+
"\n",
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
const result = parseThreads(input);
|
|
31
|
+
assert.equal(result.summary, undefined);
|
|
32
|
+
assert.equal(result.findings.length, 1);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("handles summary at end of file", () => {
|
|
36
|
+
const input = [
|
|
37
|
+
"---",
|
|
38
|
+
"path: src/main.go",
|
|
39
|
+
"line: 10",
|
|
40
|
+
"---",
|
|
41
|
+
"Finding body.",
|
|
42
|
+
"---",
|
|
43
|
+
"summary: true",
|
|
44
|
+
"---",
|
|
45
|
+
"Summary at the end.",
|
|
46
|
+
].join("\n");
|
|
47
|
+
|
|
48
|
+
const result = parseThreads(input);
|
|
49
|
+
assert.equal(result.summary, "Summary at the end.");
|
|
50
|
+
assert.equal(result.findings.length, 1);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe("parseThreads summary edge cases", () => {
|
|
55
|
+
it("treats empty summary body as empty string", () => {
|
|
56
|
+
const input = [
|
|
57
|
+
"---",
|
|
58
|
+
"summary: true",
|
|
59
|
+
"---",
|
|
60
|
+
"",
|
|
61
|
+
"---",
|
|
62
|
+
"path: src/main.go",
|
|
63
|
+
"line: 10",
|
|
64
|
+
"---",
|
|
65
|
+
"Finding.",
|
|
66
|
+
].join("\n");
|
|
67
|
+
|
|
68
|
+
const result = parseThreads(input);
|
|
69
|
+
assert.equal(result.summary, "");
|
|
70
|
+
assert.equal(result.findings.length, 1);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe("parseThreads findings", () => {
|
|
6
75
|
it("parses a single finding", () => {
|
|
7
76
|
const input = [
|
|
8
77
|
"---",
|
|
@@ -13,11 +82,11 @@ describe("parseThreads", () => {
|
|
|
13
82
|
].join("\n");
|
|
14
83
|
|
|
15
84
|
const result = parseThreads(input);
|
|
16
|
-
assert.equal(result.length, 1);
|
|
17
|
-
assert.equal(result[0].path, "pkg/server/handler.go");
|
|
18
|
-
assert.equal(result[0].line, 48);
|
|
19
|
-
assert.equal(result[0].startLine, undefined);
|
|
20
|
-
assert.equal(result[0].body.trim(), "The error is silently discarded.");
|
|
85
|
+
assert.equal(result.findings.length, 1);
|
|
86
|
+
assert.equal(result.findings[0].path, "pkg/server/handler.go");
|
|
87
|
+
assert.equal(result.findings[0].line, 48);
|
|
88
|
+
assert.equal(result.findings[0].startLine, undefined);
|
|
89
|
+
assert.equal(result.findings[0].body.trim(), "The error is silently discarded.");
|
|
21
90
|
});
|
|
22
91
|
|
|
23
92
|
it("parses multiple findings separated by ---", () => {
|
|
@@ -36,23 +105,23 @@ describe("parseThreads", () => {
|
|
|
36
105
|
].join("\n");
|
|
37
106
|
|
|
38
107
|
const result = parseThreads(input);
|
|
39
|
-
assert.equal(result.length, 2);
|
|
40
|
-
assert.equal(result[0].path, "a.go");
|
|
41
|
-
assert.equal(result[0].startLine, 10);
|
|
42
|
-
assert.equal(result[0].line, 15);
|
|
43
|
-
assert.equal(result[1].path, "b.go");
|
|
44
|
-
assert.equal(result[1].line, 3);
|
|
108
|
+
assert.equal(result.findings.length, 2);
|
|
109
|
+
assert.equal(result.findings[0].path, "a.go");
|
|
110
|
+
assert.equal(result.findings[0].startLine, 10);
|
|
111
|
+
assert.equal(result.findings[0].line, 15);
|
|
112
|
+
assert.equal(result.findings[1].path, "b.go");
|
|
113
|
+
assert.equal(result.findings[1].line, 3);
|
|
45
114
|
});
|
|
46
115
|
|
|
47
116
|
it("ignores startLine: 0 (treats as single-line)", () => {
|
|
48
117
|
const input = ["---", "path: x.go", "startLine: 0", "line: 5", "---", "Body."].join("\n");
|
|
49
118
|
|
|
50
119
|
const result = parseThreads(input);
|
|
51
|
-
assert.equal(result[0].startLine, undefined);
|
|
120
|
+
assert.equal(result.findings[0].startLine, undefined);
|
|
52
121
|
});
|
|
53
122
|
|
|
54
|
-
it("returns empty
|
|
123
|
+
it("returns empty findings for empty input", () => {
|
|
55
124
|
const result = parseThreads("");
|
|
56
|
-
assert.equal(result.length, 0);
|
|
125
|
+
assert.equal(result.findings.length, 0);
|
|
57
126
|
});
|
|
58
127
|
});
|
package/src/parse-threads.ts
CHANGED
|
@@ -2,6 +2,12 @@ import matter from "gray-matter";
|
|
|
2
2
|
import yaml from "js-yaml";
|
|
3
3
|
import type { Finding } from "./diff-classifier.ts";
|
|
4
4
|
|
|
5
|
+
/** @internal Not part of the public API — subject to change without notice. */
|
|
6
|
+
export interface ParsedThreads {
|
|
7
|
+
findings: Finding[];
|
|
8
|
+
summary?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
5
11
|
/**
|
|
6
12
|
* gray-matter engine restricted to safe YAML parsing (no !!js/function etc.).
|
|
7
13
|
* FAILSAFE_SCHEMA returns all scalars as strings — numeric fields (line, startLine)
|
|
@@ -16,11 +22,13 @@ const safeYamlEngine = (s: string): Record<string, unknown> => {
|
|
|
16
22
|
const matterOptions = { engines: { yaml: safeYamlEngine } };
|
|
17
23
|
|
|
18
24
|
/**
|
|
19
|
-
* Parse a threads.md file into
|
|
20
|
-
* The file uses --- separators between
|
|
25
|
+
* Parse a threads.md file into findings and an optional summary.
|
|
26
|
+
* The file uses --- separators between documents, each with YAML frontmatter.
|
|
27
|
+
* A document with `summary: true` in its frontmatter is extracted as the review summary.
|
|
21
28
|
*/
|
|
22
|
-
function parseThreads(content: string):
|
|
29
|
+
function parseThreads(content: string): ParsedThreads {
|
|
23
30
|
const findings: Finding[] = [];
|
|
31
|
+
let summary: string | undefined;
|
|
24
32
|
const documents = splitDocuments(content);
|
|
25
33
|
|
|
26
34
|
for (const doc of documents) {
|
|
@@ -33,6 +41,16 @@ function parseThreads(content: string): Finding[] {
|
|
|
33
41
|
continue;
|
|
34
42
|
}
|
|
35
43
|
const data = parsed.data as Record<string, unknown>;
|
|
44
|
+
|
|
45
|
+
// Handle summary documents
|
|
46
|
+
if (String(data.summary).toLowerCase() === "true") {
|
|
47
|
+
if (summary !== undefined) {
|
|
48
|
+
console.warn("WARN: Multiple summary documents found — using the last one.");
|
|
49
|
+
}
|
|
50
|
+
summary = parsed.content.trim();
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
|
|
36
54
|
const path = typeof data.path === "string" ? data.path : undefined;
|
|
37
55
|
const lineNum = Number(data.line);
|
|
38
56
|
if (path === undefined || path === "" || !Number.isFinite(lineNum) || lineNum < 1) {
|
|
@@ -56,7 +74,7 @@ function parseThreads(content: string): Finding[] {
|
|
|
56
74
|
});
|
|
57
75
|
}
|
|
58
76
|
|
|
59
|
-
return findings;
|
|
77
|
+
return { findings, summary };
|
|
60
78
|
}
|
|
61
79
|
|
|
62
80
|
const YAML_KEY_RE = /^\w[\w\s]*:/u;
|
package/src/post-review.test.ts
CHANGED
|
@@ -2,6 +2,36 @@ import { describe, it } from "bun:test";
|
|
|
2
2
|
import assert from "node:assert/strict";
|
|
3
3
|
import { parseThreads } from "./parse-threads.ts";
|
|
4
4
|
import { classifyFindings } from "./diff-classifier.ts";
|
|
5
|
+
import { buildReviewBody } from "./review-helpers.ts";
|
|
6
|
+
|
|
7
|
+
describe("buildReviewBody", () => {
|
|
8
|
+
it("includes summary when provided and no tier3 findings", () => {
|
|
9
|
+
const body = buildReviewBody([], "owner", "repo", "a".repeat(40), "Overall looks good.");
|
|
10
|
+
assert.ok(body.includes("Overall looks good."));
|
|
11
|
+
assert.ok(body.includes("🤖 Review generated by AI"));
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("includes both summary and tier3 findings", () => {
|
|
15
|
+
const tier3 = [{ path: "pkg/other.go", line: 5, body: "Unused import.", tier: 3 as const }];
|
|
16
|
+
const body = buildReviewBody(tier3, "owner", "repo", "a".repeat(40), "Has issues.");
|
|
17
|
+
assert.ok(body.includes("Has issues."));
|
|
18
|
+
assert.ok(body.includes("pkg/other.go"));
|
|
19
|
+
assert.ok(body.includes("Unused import."));
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("works without summary (backward compatible)", () => {
|
|
23
|
+
const tier3 = [{ path: "pkg/other.go", line: 5, body: "Unused import.", tier: 3 as const }];
|
|
24
|
+
const body = buildReviewBody(tier3, "owner", "repo", "a".repeat(40));
|
|
25
|
+
assert.ok(!body.includes("undefined"));
|
|
26
|
+
assert.ok(body.includes("pkg/other.go"));
|
|
27
|
+
assert.ok(body.includes("🤖 Review generated by AI"));
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("only shows trailer when no summary and no tier3 findings", () => {
|
|
31
|
+
const body = buildReviewBody([], "owner", "repo", "a".repeat(40));
|
|
32
|
+
assert.equal(body, "\n\n---\n🤖 Review generated by AI");
|
|
33
|
+
});
|
|
34
|
+
});
|
|
5
35
|
|
|
6
36
|
describe("integration: parse → classify", () => {
|
|
7
37
|
const threadsContent = [
|
|
@@ -38,7 +68,7 @@ describe("integration: parse → classify", () => {
|
|
|
38
68
|
].join("\n");
|
|
39
69
|
|
|
40
70
|
it("classifies findings correctly across all 3 tiers", () => {
|
|
41
|
-
const findings = parseThreads(threadsContent);
|
|
71
|
+
const { findings } = parseThreads(threadsContent);
|
|
42
72
|
assert.equal(findings.length, 3);
|
|
43
73
|
|
|
44
74
|
const classified = classifyFindings(findings, diff);
|
package/src/post-review.ts
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import { readFile, realpath } from "node:fs/promises";
|
|
2
2
|
import { resolve } from "node:path";
|
|
3
3
|
import { parseThreads } from "./parse-threads.ts";
|
|
4
|
-
import {
|
|
5
|
-
import type { Finding, ClassifiedFinding } from "./diff-classifier.ts";
|
|
4
|
+
import { type Finding, type ClassifiedFinding } from "./diff-classifier.ts";
|
|
6
5
|
import { type PrInfo, getPrInfo, execFileAsync } from "./graphql.ts";
|
|
7
6
|
import {
|
|
8
7
|
type PendingReview,
|
|
@@ -14,12 +13,13 @@ import {
|
|
|
14
13
|
updateReviewComment,
|
|
15
14
|
} from "./review-api.ts";
|
|
16
15
|
import {
|
|
17
|
-
escapeHtml,
|
|
18
16
|
isValidPath,
|
|
19
17
|
isRateLimitError,
|
|
20
18
|
findingId,
|
|
21
19
|
embedFindingId,
|
|
22
20
|
extractFindingId,
|
|
21
|
+
buildReviewBody,
|
|
22
|
+
classifyAndLog,
|
|
23
23
|
} from "./review-helpers.ts";
|
|
24
24
|
|
|
25
25
|
export interface PostReviewOptions {
|
|
@@ -37,27 +37,7 @@ export interface PostReviewResult {
|
|
|
37
37
|
failed: string[];
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
function buildReviewBody(
|
|
43
|
-
tier3Findings: ClassifiedFinding[],
|
|
44
|
-
owner: string,
|
|
45
|
-
name: string,
|
|
46
|
-
headOid: string,
|
|
47
|
-
): string {
|
|
48
|
-
let body = "";
|
|
49
|
-
for (const f of tier3Findings) {
|
|
50
|
-
if (!isValidPath(f.path)) {
|
|
51
|
-
console.warn(`WARN: Skipping finding with suspicious path: ${f.path}`);
|
|
52
|
-
continue;
|
|
53
|
-
}
|
|
54
|
-
const permalink = `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(name)}/blob/${headOid}/${f.path.split("/").map(encodeURIComponent).join("/")}#L${f.line}`;
|
|
55
|
-
// Neutralize all HTML tags — GitHub re-renders markdown inside <details>.
|
|
56
|
-
body += `<details>\n<summary><a href="${permalink}"><code>${escapeHtml(f.path)}:${f.line}</code></a></summary>\n\n${f.body}\n</details>\n\n`;
|
|
57
|
-
}
|
|
58
|
-
body += AI_TRAILER;
|
|
59
|
-
return body;
|
|
60
|
-
}
|
|
40
|
+
const MAX_INLINE_FINDINGS = 200;
|
|
61
41
|
|
|
62
42
|
async function updateExistingThreads(
|
|
63
43
|
existingReview: PendingReview,
|
|
@@ -79,9 +59,16 @@ async function updateExistingThreads(
|
|
|
79
59
|
}
|
|
80
60
|
}
|
|
81
61
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
62
|
+
const CONCURRENCY = 2;
|
|
63
|
+
for (let i = 0; i < toUpdate.length; i += CONCURRENCY) {
|
|
64
|
+
if (i > 0) await Bun.sleep(1000);
|
|
65
|
+
const batch = toUpdate.slice(i, i + CONCURRENCY);
|
|
66
|
+
await Promise.all(
|
|
67
|
+
batch.map(async ({ finding, commentId, id }) => {
|
|
68
|
+
const body = embedFindingId(finding.body, id);
|
|
69
|
+
await updateReviewComment(commentId, body);
|
|
70
|
+
}),
|
|
71
|
+
);
|
|
85
72
|
}
|
|
86
73
|
|
|
87
74
|
console.log(`Updated ${toUpdate.length} existing threads.`);
|
|
@@ -146,13 +133,20 @@ async function postInlineThreads(
|
|
|
146
133
|
(f) => !updatedIds.has(findingId(f.path, f.startLine, f.line, f.body)),
|
|
147
134
|
);
|
|
148
135
|
|
|
149
|
-
|
|
136
|
+
if (pending.length > MAX_INLINE_FINDINGS) {
|
|
137
|
+
console.warn(
|
|
138
|
+
`WARN: ${pending.length} inline findings exceed limit of ${MAX_INLINE_FINDINGS}. Truncating.`,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
const capped = pending.slice(0, MAX_INLINE_FINDINGS);
|
|
142
|
+
|
|
143
|
+
for (let i = 0; i < capped.length; i += CONCURRENCY) {
|
|
150
144
|
if (i > 0) await Bun.sleep(1000);
|
|
151
|
-
const batch =
|
|
145
|
+
const batch = capped.slice(i, i + CONCURRENCY);
|
|
152
146
|
const results = await Promise.all(
|
|
153
147
|
batch.map(async (f) => {
|
|
154
148
|
const id = findingId(f.path, f.startLine, f.line, f.body);
|
|
155
|
-
//
|
|
149
|
+
// Body rendered by GitHub's Markdown sanitizer; no escaping needed here
|
|
156
150
|
const body = embedFindingId(f.body, id);
|
|
157
151
|
const ok = await postWithRetry(reviewId, f, body);
|
|
158
152
|
return ok ? null : id;
|
|
@@ -165,31 +159,6 @@ async function postInlineThreads(
|
|
|
165
159
|
return failedFindings;
|
|
166
160
|
}
|
|
167
161
|
|
|
168
|
-
function classifyAndLog(
|
|
169
|
-
findings: Finding[],
|
|
170
|
-
diff: string,
|
|
171
|
-
): { tier1: ClassifiedFinding[]; tier2: ClassifiedFinding[]; tier3: ClassifiedFinding[] } {
|
|
172
|
-
const validated = findings.filter((f) => {
|
|
173
|
-
if (!isValidPath(f.path)) {
|
|
174
|
-
console.warn(`WARN: Skipping finding with suspicious path: ${f.path}`);
|
|
175
|
-
return false;
|
|
176
|
-
}
|
|
177
|
-
return true;
|
|
178
|
-
});
|
|
179
|
-
const classified = classifyFindings(validated, diff);
|
|
180
|
-
const tier1 = classified.filter((f) => f.tier === 1);
|
|
181
|
-
const tier2 = classified.filter((f) => f.tier === 2);
|
|
182
|
-
const tier3 = classified.filter((f) => f.tier === 3);
|
|
183
|
-
|
|
184
|
-
for (const f of tier2) {
|
|
185
|
-
console.warn(`WARN: ${f.path}:${f.line} demoted to file-level`);
|
|
186
|
-
}
|
|
187
|
-
for (const f of tier3) {
|
|
188
|
-
console.warn(`WARN: ${f.path}:${f.line} demoted to review body`);
|
|
189
|
-
}
|
|
190
|
-
return { tier1, tier2, tier3 };
|
|
191
|
-
}
|
|
192
|
-
|
|
193
162
|
async function ensureReview(
|
|
194
163
|
prNodeId: string,
|
|
195
164
|
headOid: string,
|
|
@@ -198,18 +167,22 @@ async function ensureReview(
|
|
|
198
167
|
tier3: ClassifiedFinding[],
|
|
199
168
|
owner: string,
|
|
200
169
|
name: string,
|
|
170
|
+
summary?: string,
|
|
201
171
|
): Promise<{ reviewId: string; updatedFindings: Set<string> }> {
|
|
202
172
|
const existingReview = await findPendingReview(prNodeId);
|
|
203
173
|
if (existingReview) {
|
|
204
174
|
console.log(`Found existing pending review: ${existingReview.id}`);
|
|
205
|
-
await updateReviewBody(
|
|
175
|
+
await updateReviewBody(
|
|
176
|
+
existingReview.id,
|
|
177
|
+
buildReviewBody(tier3, owner, name, headOid, summary),
|
|
178
|
+
);
|
|
206
179
|
const updated = await updateExistingThreads(existingReview, [...tier1, ...tier2]);
|
|
207
180
|
return { reviewId: existingReview.id, updatedFindings: updated };
|
|
208
181
|
}
|
|
209
182
|
const reviewId = await createPendingReview(
|
|
210
183
|
prNodeId,
|
|
211
184
|
headOid,
|
|
212
|
-
buildReviewBody(tier3, owner, name, headOid),
|
|
185
|
+
buildReviewBody(tier3, owner, name, headOid, summary),
|
|
213
186
|
);
|
|
214
187
|
console.log(`Created pending review: ${reviewId}`);
|
|
215
188
|
return { reviewId, updatedFindings: new Set() };
|
|
@@ -220,10 +193,11 @@ async function loadAndValidatePr(
|
|
|
220
193
|
prNumber: number,
|
|
221
194
|
expectedSha: string | undefined,
|
|
222
195
|
cwd: string | undefined,
|
|
223
|
-
): Promise<{ findings: Finding[]; prInfo: PrInfo } | null> {
|
|
196
|
+
): Promise<{ findings: Finding[]; prInfo: PrInfo; summary?: string } | null> {
|
|
224
197
|
if (!isValidPath(threadsPath)) {
|
|
225
198
|
throw new Error(`Invalid threadsPath: ${threadsPath}`);
|
|
226
199
|
}
|
|
200
|
+
// cwd is a trusted parameter set by the plugin framework (not user-supplied)
|
|
227
201
|
const base = await realpath(resolve(cwd ?? process.cwd()));
|
|
228
202
|
const candidatePath = resolve(base, threadsPath);
|
|
229
203
|
let resolved: string;
|
|
@@ -236,8 +210,8 @@ async function loadAndValidatePr(
|
|
|
236
210
|
throw new Error(`threadsPath escapes working directory: ${threadsPath}`);
|
|
237
211
|
}
|
|
238
212
|
const content = await readFile(resolved, "utf8");
|
|
239
|
-
const findings = parseThreads(content);
|
|
240
|
-
if (findings.length === 0) return null;
|
|
213
|
+
const { findings, summary } = parseThreads(content);
|
|
214
|
+
if (findings.length === 0 && (summary === undefined || summary === "")) return null;
|
|
241
215
|
|
|
242
216
|
const prInfo = await getPrInfo(prNumber, { cwd });
|
|
243
217
|
if (prInfo.state !== "OPEN") {
|
|
@@ -249,7 +223,7 @@ async function loadAndValidatePr(
|
|
|
249
223
|
throw new Error(`ABORT: PR head moved (expected ${resolvedSha}, got ${prInfo.headOid}).`);
|
|
250
224
|
}
|
|
251
225
|
|
|
252
|
-
return { findings, prInfo };
|
|
226
|
+
return { findings, prInfo, summary };
|
|
253
227
|
}
|
|
254
228
|
|
|
255
229
|
async function postFindings(
|
|
@@ -258,6 +232,7 @@ async function postFindings(
|
|
|
258
232
|
tier3: ClassifiedFinding[],
|
|
259
233
|
prInfo: PrInfo,
|
|
260
234
|
skipIds: string[],
|
|
235
|
+
summary?: string,
|
|
261
236
|
): Promise<PostReviewResult> {
|
|
262
237
|
const { owner, name, prNodeId, headOid } = prInfo;
|
|
263
238
|
const { reviewId, updatedFindings } = await ensureReview(
|
|
@@ -268,6 +243,7 @@ async function postFindings(
|
|
|
268
243
|
tier3,
|
|
269
244
|
owner,
|
|
270
245
|
name,
|
|
246
|
+
summary,
|
|
271
247
|
);
|
|
272
248
|
|
|
273
249
|
const skipSet = new Set(skipIds);
|
|
@@ -280,28 +256,28 @@ async function postFindings(
|
|
|
280
256
|
const skipped = inlineFindings.filter((f) =>
|
|
281
257
|
updatedFindings.has(findingId(f.path, f.startLine, f.line, f.body)),
|
|
282
258
|
).length;
|
|
283
|
-
const
|
|
259
|
+
const pending = inlineFindings.length - skipped;
|
|
260
|
+
const truncatedCount = Math.max(0, pending - MAX_INLINE_FINDINGS);
|
|
261
|
+
const posted = Math.min(pending, MAX_INLINE_FINDINGS) - failedIds.length;
|
|
284
262
|
const totalFindings = [...tier1, ...tier2].length;
|
|
285
263
|
const skippedByUser = totalFindings - inlineFindings.length;
|
|
286
|
-
const
|
|
287
|
-
`Posted ${posted}/${totalFindings} inline threads (${skipped} up-to-date, ${skippedByUser} skipped, ${failedIds.length} failed). ` +
|
|
264
|
+
const statusSummary =
|
|
265
|
+
`Posted ${posted}/${totalFindings} inline threads (${skipped} up-to-date, ${skippedByUser} skipped, ${failedIds.length} failed${truncatedCount > 0 ? `, ${truncatedCount} truncated` : ""}). ` +
|
|
288
266
|
`${tier3.length} findings in review body.`;
|
|
289
267
|
|
|
290
|
-
console.log(
|
|
268
|
+
console.log(statusSummary);
|
|
291
269
|
|
|
292
|
-
return { summary, failed: failedIds };
|
|
270
|
+
return { summary: statusSummary, failed: failedIds };
|
|
293
271
|
}
|
|
294
272
|
|
|
295
|
-
/**
|
|
296
|
-
* Post a review to a GitHub PR.
|
|
297
|
-
*/
|
|
298
273
|
export async function postReview(opts: PostReviewOptions): Promise<PostReviewResult> {
|
|
299
274
|
const { threadsPath, prNumber, dryRun = false, skipIds = [], expectedSha } = opts;
|
|
300
275
|
|
|
301
276
|
const result = await loadAndValidatePr(threadsPath, prNumber, expectedSha, opts.cwd);
|
|
302
277
|
if (!result) return { summary: "No findings to post.", failed: [] };
|
|
303
278
|
|
|
304
|
-
const { findings, prInfo } = result;
|
|
279
|
+
const { findings, prInfo, summary: reviewSummary } = result;
|
|
280
|
+
|
|
305
281
|
const diff =
|
|
306
282
|
opts.diffText ??
|
|
307
283
|
(
|
|
@@ -321,5 +297,5 @@ export async function postReview(opts: PostReviewOptions): Promise<PostReviewRes
|
|
|
321
297
|
};
|
|
322
298
|
}
|
|
323
299
|
|
|
324
|
-
return postFindings(tier1, tier2, tier3, prInfo, skipIds);
|
|
300
|
+
return postFindings(tier1, tier2, tier3, prInfo, skipIds, reviewSummary);
|
|
325
301
|
}
|
package/src/review-api.ts
CHANGED
|
@@ -64,7 +64,15 @@ async function fetchRemainingComments(
|
|
|
64
64
|
const comments: ReviewComment[] = [];
|
|
65
65
|
let pageInfo = initialPageInfo;
|
|
66
66
|
let pages = 0;
|
|
67
|
+
// 5 minute aggregate timeout for pagination
|
|
68
|
+
const deadline = Date.now() + 5 * 60 * 1000;
|
|
67
69
|
while (pageInfo.hasNextPage && pages++ < MAX_PAGES) {
|
|
70
|
+
if (Date.now() > deadline) {
|
|
71
|
+
console.warn(
|
|
72
|
+
`WARN: Aggregate timeout reached after ${pages} pages. Returning partial results.`,
|
|
73
|
+
);
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
68
76
|
const page = await graphql<CommentsPage>(
|
|
69
77
|
`
|
|
70
78
|
query ($reviewId: ID!, $after: String!) {
|
package/src/review-helpers.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
+
import { classifyFindings, type Finding, type ClassifiedFinding } from "./diff-classifier.ts";
|
|
2
3
|
|
|
3
4
|
const FINDING_ID_RE = /<!-- finding:([a-f0-9]+) -->/u;
|
|
5
|
+
const AI_TRAILER = "\n\n---\n🤖 Review generated by AI";
|
|
6
|
+
const GITHUB_NAME_RE = /^[a-zA-Z0-9](?:[a-zA-Z0-9._-]*[a-zA-Z0-9])?$/u;
|
|
4
7
|
|
|
5
8
|
export function escapeHtml(s: string): string {
|
|
6
9
|
return s
|
|
@@ -63,3 +66,66 @@ export function extractFindingId(body: string): string | null {
|
|
|
63
66
|
const match = FINDING_ID_RE.exec(body);
|
|
64
67
|
return match ? match[1] : null;
|
|
65
68
|
}
|
|
69
|
+
|
|
70
|
+
/** @internal Not part of the public API — subject to change without notice. */
|
|
71
|
+
export function buildReviewBody(
|
|
72
|
+
tier3Findings: ClassifiedFinding[],
|
|
73
|
+
owner: string,
|
|
74
|
+
name: string,
|
|
75
|
+
headOid: string,
|
|
76
|
+
summary?: string,
|
|
77
|
+
): string {
|
|
78
|
+
const validOid = /^[0-9a-f]{40,64}$/u.test(headOid);
|
|
79
|
+
if (!validOid) {
|
|
80
|
+
console.warn(`WARN: Invalid headOid format (${headOid}). Skipping permalinks.`);
|
|
81
|
+
}
|
|
82
|
+
const validRepo = GITHUB_NAME_RE.test(owner) && GITHUB_NAME_RE.test(name);
|
|
83
|
+
if (!validRepo) {
|
|
84
|
+
console.warn(`WARN: Invalid owner/name format (${owner}/${name}). Skipping permalinks.`);
|
|
85
|
+
}
|
|
86
|
+
let body = "";
|
|
87
|
+
if (summary !== undefined && summary !== "") {
|
|
88
|
+
// Trust boundary: summary is AI-generated Markdown from the formatter agent,
|
|
89
|
+
// rendered by GitHub's Markdown sanitizer (same trust level as finding bodies).
|
|
90
|
+
body += summary + "\n\n";
|
|
91
|
+
}
|
|
92
|
+
for (const f of tier3Findings) {
|
|
93
|
+
if (!isValidPath(f.path)) {
|
|
94
|
+
console.warn(`WARN: Skipping finding with suspicious path: ${f.path}`);
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (validOid && validRepo) {
|
|
98
|
+
const permalink = `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(name)}/blob/${headOid}/${f.path.split("/").map(encodeURIComponent).join("/")}#L${f.line}`;
|
|
99
|
+
body += `<details>\n<summary><a href="${permalink}"><code>${escapeHtml(f.path)}:${f.line}</code></a></summary>\n\n${f.body}\n</details>\n\n`;
|
|
100
|
+
} else {
|
|
101
|
+
body += `<details>\n<summary><code>${escapeHtml(f.path)}:${f.line}</code></summary>\n\n${f.body}\n</details>\n\n`;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
body += AI_TRAILER;
|
|
105
|
+
return body;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export function classifyAndLog(
|
|
109
|
+
findings: Finding[],
|
|
110
|
+
diff: string,
|
|
111
|
+
): { tier1: ClassifiedFinding[]; tier2: ClassifiedFinding[]; tier3: ClassifiedFinding[] } {
|
|
112
|
+
const validated = findings.filter((f) => {
|
|
113
|
+
if (!isValidPath(f.path)) {
|
|
114
|
+
console.warn(`WARN: Skipping finding with suspicious path: ${f.path}`);
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
return true;
|
|
118
|
+
});
|
|
119
|
+
const classified = classifyFindings(validated, diff);
|
|
120
|
+
const tier1 = classified.filter((f) => f.tier === 1);
|
|
121
|
+
const tier2 = classified.filter((f) => f.tier === 2);
|
|
122
|
+
const tier3 = classified.filter((f) => f.tier === 3);
|
|
123
|
+
|
|
124
|
+
for (const f of tier2) {
|
|
125
|
+
console.warn(`WARN: ${f.path}:${f.line} demoted to file-level`);
|
|
126
|
+
}
|
|
127
|
+
for (const f of tier3) {
|
|
128
|
+
console.warn(`WARN: ${f.path}:${f.line} demoted to review body`);
|
|
129
|
+
}
|
|
130
|
+
return { tier1, tier2, tier3 };
|
|
131
|
+
}
|