specvector 0.3.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,329 @@
1
+ /**
2
+ * Finding Merger & Deduplication for the Scalable Review Pipeline.
3
+ *
4
+ * Takes raw findings from BatchResult, deduplicates semantically similar
5
+ * findings, generalizes patterns that appear in 3+ files, sorts by severity,
6
+ * and produces a ReviewResult for the formatter.
7
+ *
8
+ * This is a pure function — no LLM calls, no IO.
9
+ */
10
+
11
+ import type { BatchResult, BatchError } from "./batcher";
12
+ import type {
13
+ ReviewFinding,
14
+ ReviewResult,
15
+ ReviewStats,
16
+ Severity,
17
+ } from "../types/review";
18
+ import { calculateStats, determineRecommendation } from "../types/review";
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Types
22
+ // ---------------------------------------------------------------------------
23
+
24
+ /** Configuration for the merger. */
25
+ export interface MergerConfig {
26
+ /** Jaccard similarity threshold for title deduplication (default: 0.7) */
27
+ similarityThreshold: number;
28
+ /** Minimum files for pattern generalization (default: 3) */
29
+ patternThreshold: number;
30
+ }
31
+
32
+ const DEFAULT_MERGER_CONFIG: MergerConfig = {
33
+ similarityThreshold: 0.7,
34
+ patternThreshold: 3,
35
+ };
36
+
37
+ /** Severity sort order (lower = higher priority). */
38
+ const SEVERITY_ORDER: Record<Severity, number> = {
39
+ CRITICAL: 0,
40
+ HIGH: 1,
41
+ MEDIUM: 2,
42
+ LOW: 3,
43
+ };
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // Main Entry Point
47
+ // ---------------------------------------------------------------------------
48
+
49
+ /**
50
+ * Merge findings from a BatchResult into a single ReviewResult.
51
+ *
52
+ * Pipeline: deduplicate → generalize patterns → sort → build ReviewResult.
53
+ */
54
+ export function mergeFindings(
55
+ batchResult: BatchResult,
56
+ filesReviewed: number,
57
+ config?: Partial<MergerConfig>,
58
+ ): ReviewResult {
59
+ const cfg: MergerConfig = { ...DEFAULT_MERGER_CONFIG, ...config };
60
+
61
+ // Pipeline
62
+ const deduplicated = deduplicateFindings(batchResult.findings, cfg.similarityThreshold);
63
+ const generalized = generalizePatterns(deduplicated, cfg.patternThreshold);
64
+ const sorted = sortFindings(generalized);
65
+
66
+ // Compute stats, recommendation, and summary
67
+ const stats = calculateStats(sorted);
68
+ const recommendation = determineRecommendation(stats);
69
+ const summary = buildSummary(sorted, batchResult.errors, stats);
70
+
71
+ return {
72
+ findings: sorted,
73
+ summary,
74
+ recommendation,
75
+ stats,
76
+ filesReviewed,
77
+ contextSources:
78
+ batchResult.contextSources.length > 0
79
+ ? batchResult.contextSources
80
+ : undefined,
81
+ };
82
+ }
83
+
84
+ // ---------------------------------------------------------------------------
85
+ // Deduplication
86
+ // ---------------------------------------------------------------------------
87
+
88
+ /**
89
+ * Internal representation of a finding with collected file references.
90
+ */
91
+ interface FindingCluster {
92
+ /** Representative finding (longest description) */
93
+ representative: ReviewFinding;
94
+ /** All files from merged findings */
95
+ affectedFiles: string[];
96
+ }
97
+
98
+ /**
99
+ * Deduplicate semantically similar findings.
100
+ *
101
+ * Groups findings into clusters by similarity, keeps the most detailed
102
+ * representative for each cluster, and collects all affected file paths.
103
+ */
104
+ export function deduplicateFindings(
105
+ findings: ReviewFinding[],
106
+ similarityThreshold: number = DEFAULT_MERGER_CONFIG.similarityThreshold,
107
+ ): ReviewFinding[] {
108
+ if (findings.length === 0) return [];
109
+
110
+ // Sort deterministically before clustering so that input order
111
+ // (which depends on non-deterministic Promise.allSettled resolution)
112
+ // does not affect which clusters form.
113
+ const sorted = [...findings].sort((a, b) => {
114
+ const sevDiff = SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity];
115
+ if (sevDiff !== 0) return sevDiff;
116
+ const titleDiff = a.title.localeCompare(b.title);
117
+ if (titleDiff !== 0) return titleDiff;
118
+ return (a.file ?? "").localeCompare(b.file ?? "");
119
+ });
120
+
121
+ const clusters: FindingCluster[] = [];
122
+
123
+ for (const finding of sorted) {
124
+ let merged = false;
125
+
126
+ for (const cluster of clusters) {
127
+ if (areSimilarFindings(finding, cluster.representative, similarityThreshold)) {
128
+ // Merge into existing cluster
129
+ if (finding.file && !cluster.affectedFiles.includes(finding.file)) {
130
+ cluster.affectedFiles.push(finding.file);
131
+ }
132
+ // Keep the longer description as representative
133
+ if (finding.description.length > cluster.representative.description.length) {
134
+ const prev = cluster.representative;
135
+ const files = cluster.affectedFiles;
136
+ cluster.representative = { ...finding };
137
+ cluster.affectedFiles = files;
138
+ // Carry forward suggestion from previous representative if new one lacks it
139
+ if (!cluster.representative.suggestion && prev.suggestion) {
140
+ cluster.representative.suggestion = prev.suggestion;
141
+ }
142
+ }
143
+ merged = true;
144
+ break;
145
+ }
146
+ }
147
+
148
+ if (!merged) {
149
+ clusters.push({
150
+ representative: { ...finding },
151
+ affectedFiles: finding.file ? [finding.file] : [],
152
+ });
153
+ }
154
+ }
155
+
156
+ // Convert clusters back to findings, attaching affectedFiles metadata
157
+ return clusters.map((cluster) => {
158
+ const finding = { ...cluster.representative };
159
+ if (cluster.affectedFiles.length > 1) {
160
+ // Store affected files for pattern generalization
161
+ (finding as FindingWithFiles)._affectedFiles = cluster.affectedFiles;
162
+ }
163
+ return finding;
164
+ });
165
+ }
166
+
167
+ /** Internal extension to carry affected files through the pipeline. */
168
+ interface FindingWithFiles extends ReviewFinding {
169
+ _affectedFiles?: string[];
170
+ }
171
+
172
+ // ---------------------------------------------------------------------------
173
+ // Similarity
174
+ // ---------------------------------------------------------------------------
175
+
176
+ /**
177
+ * Check if two findings are semantically similar (candidates for deduplication).
178
+ *
179
+ * Criteria:
180
+ * - Same severity
181
+ * - Same category (both null or both equal)
182
+ * - Title Jaccard similarity >= threshold
183
+ * - Different files (don't merge findings pointing to the same file)
184
+ */
185
+ export function areSimilarFindings(
186
+ a: ReviewFinding,
187
+ b: ReviewFinding,
188
+ threshold: number = DEFAULT_MERGER_CONFIG.similarityThreshold,
189
+ ): boolean {
190
+ // Severity must match
191
+ if (a.severity !== b.severity) return false;
192
+
193
+ // Category must match (both undefined or both equal)
194
+ if ((a.category ?? null) !== (b.category ?? null)) return false;
195
+
196
+ // Don't merge findings about the same file (they're likely different issues)
197
+ if (a.file && b.file && a.file === b.file) return false;
198
+
199
+ // Title similarity via Jaccard
200
+ return jaccardSimilarity(a.title, b.title) >= threshold;
201
+ }
202
+
203
+ /**
204
+ * Compute Jaccard similarity between two strings based on word tokens.
205
+ * Returns a value between 0 (no overlap) and 1 (identical).
206
+ */
207
+ export function jaccardSimilarity(a: string, b: string): number {
208
+ const wordsA = tokenize(a);
209
+ const wordsB = tokenize(b);
210
+
211
+ if (wordsA.size === 0 && wordsB.size === 0) return 1;
212
+ if (wordsA.size === 0 || wordsB.size === 0) return 0;
213
+
214
+ let intersectionSize = 0;
215
+ for (const word of wordsA) {
216
+ if (wordsB.has(word)) intersectionSize++;
217
+ }
218
+
219
+ const unionSize = new Set([...wordsA, ...wordsB]).size;
220
+ return intersectionSize / unionSize;
221
+ }
222
+
223
+ /**
224
+ * Tokenize a string into a set of lowercase words.
225
+ */
226
+ function tokenize(text: string): Set<string> {
227
+ return new Set(
228
+ text
229
+ .toLowerCase()
230
+ .split(/\s+/)
231
+ .filter((w) => w.length > 0),
232
+ );
233
+ }
234
+
235
+ // ---------------------------------------------------------------------------
236
+ // Pattern Generalization
237
+ // ---------------------------------------------------------------------------
238
+
239
+ /**
240
+ * Generalize findings that appear in many files into pattern comments.
241
+ *
242
+ * If a finding has been deduplicated across >= threshold files, it becomes
243
+ * a repo-wide pattern comment without a specific file reference.
244
+ */
245
+ export function generalizePatterns(
246
+ findings: ReviewFinding[],
247
+ threshold: number = DEFAULT_MERGER_CONFIG.patternThreshold,
248
+ ): ReviewFinding[] {
249
+ return findings.map((finding) => {
250
+ const files = (finding as FindingWithFiles)._affectedFiles;
251
+ if (files && files.length >= threshold) {
252
+ // Generalize to pattern comment
253
+ const fileList = files.join(", ");
254
+ const result: ReviewFinding = {
255
+ ...finding,
256
+ title: `${finding.title} (pattern)`,
257
+ description: `${finding.description}\n\nFound in ${files.length} files: ${fileList}`,
258
+ file: undefined,
259
+ line: undefined,
260
+ };
261
+ // Clean internal metadata
262
+ delete (result as FindingWithFiles)._affectedFiles;
263
+ return result;
264
+ }
265
+
266
+ // Below threshold — keep file reference, clean metadata
267
+ const result = { ...finding };
268
+ delete (result as FindingWithFiles)._affectedFiles;
269
+
270
+ // If deduplicated across 2 files, note the other file in description
271
+ if (files && files.length === 2) {
272
+ const otherFile = files.find((f) => f !== finding.file);
273
+ if (otherFile) {
274
+ result.description = `${finding.description}\n\nAlso found in: ${otherFile}`;
275
+ // Clear line — it may reference the wrong file after representative swap
276
+ result.line = undefined;
277
+ }
278
+ }
279
+
280
+ return result;
281
+ });
282
+ }
283
+
284
+ // ---------------------------------------------------------------------------
285
+ // Sorting
286
+ // ---------------------------------------------------------------------------
287
+
288
+ /**
289
+ * Sort findings by severity (CRITICAL > HIGH > MEDIUM > LOW),
290
+ * then alphabetically by title within the same severity.
291
+ */
292
+ export function sortFindings(findings: ReviewFinding[]): ReviewFinding[] {
293
+ return [...findings].sort((a, b) => {
294
+ const severityDiff = SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity];
295
+ if (severityDiff !== 0) return severityDiff;
296
+ return a.title.localeCompare(b.title);
297
+ });
298
+ }
299
+
300
+ // ---------------------------------------------------------------------------
301
+ // Summary Builder
302
+ // ---------------------------------------------------------------------------
303
+
304
+ /**
305
+ * Build a human-readable summary from merged findings and batch errors.
306
+ */
307
+ function buildSummary(findings: ReviewFinding[], errors: BatchError[], stats: ReviewStats): string {
308
+ const parts: string[] = [];
309
+
310
+ if (findings.length === 0) {
311
+ parts.push("No issues found. Code looks good to merge.");
312
+ } else {
313
+ const counts: string[] = [];
314
+ if (stats.critical > 0) counts.push(`${stats.critical} critical`);
315
+ if (stats.high > 0) counts.push(`${stats.high} high`);
316
+ if (stats.medium > 0) counts.push(`${stats.medium} medium`);
317
+ if (stats.low > 0) counts.push(`${stats.low} low`);
318
+ parts.push(`Found ${findings.length} issue${findings.length === 1 ? "" : "s"}: ${counts.join(", ")}.`);
319
+ }
320
+
321
+ if (errors.length > 0) {
322
+ const totalAffected = errors.reduce((sum, e) => sum + e.filesAffected.length, 0);
323
+ parts.push(
324
+ `Note: ${errors.length} review batch${errors.length === 1 ? "" : "es"} failed (${totalAffected} file${totalAffected === 1 ? "" : "s"} not reviewed). Findings may be incomplete.`,
325
+ );
326
+ }
327
+
328
+ return parts.join(" ");
329
+ }
@@ -17,6 +17,7 @@ import { createOutlineTool } from "../agent/tools/outline";
17
17
  import { createFindSymbolTool } from "../agent/tools/find-symbol";
18
18
  import { calculateStats, determineRecommendation } from "../types/review";
19
19
  import type { ReviewResult, ReviewFinding, Severity, ContextSource } from "../types/review";
20
+ import { parseReviewResponseWithFallback, REVIEW_JSON_INSTRUCTION } from "./json-parser";
20
21
  import type { Result } from "../types/result";
21
22
  import { ok, err } from "../types/result";
22
23
  import { loadConfig, getStrictnessModifier } from "../config";
@@ -96,9 +97,9 @@ export async function runReview(
96
97
  createFindSymbolTool({ workingDir: config.workingDir }),
97
98
  ];
98
99
 
99
- // Build system prompt with strictness modifier
100
+ // Build system prompt with strictness modifier and JSON instruction
100
101
  const strictnessGuidance = getStrictnessModifier(strictness);
101
- let systemPrompt = REVIEW_SYSTEM_PROMPT + `\n\n## Strictness Setting: ${strictness.toUpperCase()}\n${strictnessGuidance}`;
102
+ let systemPrompt = REVIEW_SYSTEM_PROMPT + REVIEW_JSON_INSTRUCTION + `\n\n## Strictness Setting: ${strictness.toUpperCase()}\n${strictnessGuidance}`;
102
103
 
103
104
  // Track context sources for citation
104
105
  const contextSources: ContextSource[] = [];
@@ -163,8 +164,8 @@ export async function runReview(
163
164
  });
164
165
  }
165
166
 
166
- // Parse the response into structured findings
167
- const reviewResult = parseReviewResponse(agentResult.value, diffSummary, contextSources);
167
+ // Parse the response into structured findings (JSON first, regex fallback)
168
+ const reviewResult = parseReviewResponseWithFallback(agentResult.value, diffSummary, contextSources);
168
169
 
169
170
  return ok(reviewResult);
170
171
  }
@@ -172,7 +173,7 @@ export async function runReview(
172
173
  /**
173
174
  * System prompt for the code review agent.
174
175
  */
175
- const REVIEW_SYSTEM_PROMPT = `You are a pragmatic code reviewer. Your job is to catch REAL problems, not nitpick.
176
+ export const REVIEW_SYSTEM_PROMPT = `You are a pragmatic code reviewer. Your job is to catch REAL problems, not nitpick.
176
177
 
177
178
  ## Tools Available
178
179
  - read_file: Read source files to understand context
@@ -181,21 +182,43 @@ const REVIEW_SYSTEM_PROMPT = `You are a pragmatic code reviewer. Your job is to
181
182
  - get_outline: Get functions/classes in a file (fast overview)
182
183
  - find_symbol: Find where a function or class is defined
183
184
 
185
+ ## Tool Use Strategy
186
+ Before flagging any issue, you MUST verify your understanding:
187
+ 1. **Read the full file** for any function being changed — don't judge from diff alone
188
+ 2. **Use find_symbol** to trace how changed functions are called by other code
189
+ 3. **Use grep** to find other usages of modified functions or variables
190
+ 4. **Only flag an issue if you have verified it** by reading the surrounding context
191
+
184
192
  ## What to Look For (in priority order)
185
193
  1. **CRITICAL**: Security vulnerabilities, data loss, crashes
186
194
  2. **HIGH**: Bugs that WILL break functionality in production
187
195
  3. **MEDIUM**: Significant code quality issues (not style nits)
188
196
 
197
+ ## Business Logic Patterns to Detect
198
+ Focus on real logic errors that cause incorrect behavior:
199
+ - **Off-by-one errors**: Wrong boundary conditions, < vs <=, array index issues
200
+ - **Null/undefined handling**: Missing null checks on values that can be null
201
+ - **Race conditions**: Shared state without synchronization, async ordering bugs
202
+ - **Incorrect boolean logic**: Inverted conditions, wrong operator (AND vs OR)
203
+ - **Missing error paths**: Happy-path-only code that ignores failure cases in data flows
204
+ - **Wrong operator**: Using = instead of ==, + instead of -, incorrect comparisons
205
+ - **State management bugs**: Mutating shared state, stale closures, incorrect resets
206
+ - **Type coercion issues**: Implicit conversions causing unexpected behavior
207
+
189
208
  ## What NOT to Flag
190
209
  - Style preferences or "I would do it differently"
191
210
  - Theoretical performance issues without evidence
192
211
  - Missing edge case tests for working code
193
212
  - "Could be refactored" suggestions
194
213
  - Code that works but isn't perfect
214
+ - Naming convention preferences
215
+ - Comment formatting or missing comments
216
+ - Import ordering or grouping
195
217
 
196
218
  ## Key Principle
197
219
  Most PRs should have 0-2 findings. If you're finding 5+ issues, you're being too picky.
198
220
  Only flag issues you'd actually block a PR for in a real code review.
221
+ Verify every finding with tool use before reporting it.
199
222
 
200
223
  ## Response Format
201
224
  SUMMARY: [1-2 sentences - is this code ready to merge?]
@@ -228,15 +251,15 @@ ${diff.length > 15000 ? "\n(diff truncated, use tools to read full files if need
228
251
 
229
252
  ## Instructions
230
253
  1. First, understand what the changes are doing
231
- 2. Use tools to explore related code if needed (find usages, read implementations)
232
- 3. Identify any issues with the changes
254
+ 2. Use tools to explore related code read full files, trace call chains, check usages
255
+ 3. For each potential issue, verify it by reading surrounding context before flagging
233
256
  4. Provide your review in the specified format`;
234
257
  }
235
258
 
236
259
  /**
237
260
  * Parse the agent's response into structured findings.
238
261
  */
239
- function parseReviewResponse(response: string, diffSummary: string, contextSources: ContextSource[] = []): ReviewResult {
262
+ export function parseReviewResponse(response: string, diffSummary: string, contextSources: ContextSource[] = []): ReviewResult {
240
263
  const findings: ReviewFinding[] = [];
241
264
 
242
265
  // Extract summary