npm - specvector - Versions diffs - 0.3.1 → 0.6.1 - Mend

specvector 0.3.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/config/index.ts +5 -5
package/src/index.ts +2 -2
package/src/mcp/mcp-client.ts +3 -2
package/src/pipeline/batcher.ts +543 -0
package/src/pipeline/classifier.ts +361 -0
package/src/pipeline/index.ts +34 -0
package/src/pipeline/merger.ts +329 -0
package/src/review/engine.ts +31 -8
package/src/review/json-parser.ts +283 -0
package/src/utils/redact.ts +125 -0

package/src/pipeline/merger.ts ADDED Viewed

@@ -0,0 +1,329 @@
+/**
+ * Finding Merger & Deduplication for the Scalable Review Pipeline.
+ *
+ * Takes raw findings from BatchResult, deduplicates semantically similar
+ * findings, generalizes patterns that appear in 3+ files, sorts by severity,
+ * and produces a ReviewResult for the formatter.
+ *
+ * This is a pure function — no LLM calls, no IO.
+ */
+import type { BatchResult, BatchError } from "./batcher";
+import type {
+  ReviewFinding,
+  ReviewResult,
+  ReviewStats,
+  Severity,
+} from "../types/review";
+import { calculateStats, determineRecommendation } from "../types/review";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+/** Configuration for the merger. */
+export interface MergerConfig {
+  /** Jaccard similarity threshold for title deduplication (default: 0.7) */
+  similarityThreshold: number;
+  /** Minimum files for pattern generalization (default: 3) */
+  patternThreshold: number;
+}
+const DEFAULT_MERGER_CONFIG: MergerConfig = {
+  similarityThreshold: 0.7,
+  patternThreshold: 3,
+};
+/** Severity sort order (lower = higher priority). */
+const SEVERITY_ORDER: Record<Severity, number> = {
+  CRITICAL: 0,
+  HIGH: 1,
+  MEDIUM: 2,
+  LOW: 3,
+};
+// ---------------------------------------------------------------------------
+// Main Entry Point
+// ---------------------------------------------------------------------------
+/**
+ * Merge findings from a BatchResult into a single ReviewResult.
+ *
+ * Pipeline: deduplicate → generalize patterns → sort → build ReviewResult.
+ */
+export function mergeFindings(
+  batchResult: BatchResult,
+  filesReviewed: number,
+  config?: Partial<MergerConfig>,
+): ReviewResult {
+  const cfg: MergerConfig = { ...DEFAULT_MERGER_CONFIG, ...config };
+  // Pipeline
+  const deduplicated = deduplicateFindings(batchResult.findings, cfg.similarityThreshold);
+  const generalized = generalizePatterns(deduplicated, cfg.patternThreshold);
+  const sorted = sortFindings(generalized);
+  // Compute stats, recommendation, and summary
+  const stats = calculateStats(sorted);
+  const recommendation = determineRecommendation(stats);
+  const summary = buildSummary(sorted, batchResult.errors, stats);
+  return {
+    findings: sorted,
+    summary,
+    recommendation,
+    stats,
+    filesReviewed,
+    contextSources:
+      batchResult.contextSources.length > 0
+        ? batchResult.contextSources
+        : undefined,
+  };
+}
+// ---------------------------------------------------------------------------
+// Deduplication
+// ---------------------------------------------------------------------------
+/**
+ * Internal representation of a finding with collected file references.
+ */
+interface FindingCluster {
+  /** Representative finding (longest description) */
+  representative: ReviewFinding;
+  /** All files from merged findings */
+  affectedFiles: string[];
+}
+/**
+ * Deduplicate semantically similar findings.
+ *
+ * Groups findings into clusters by similarity, keeps the most detailed
+ * representative for each cluster, and collects all affected file paths.
+ */
+export function deduplicateFindings(
+  findings: ReviewFinding[],
+  similarityThreshold: number = DEFAULT_MERGER_CONFIG.similarityThreshold,
+): ReviewFinding[] {
+  if (findings.length === 0) return [];
+  // Sort deterministically before clustering so that input order
+  // (which depends on non-deterministic Promise.allSettled resolution)
+  // does not affect which clusters form.
+  const sorted = [...findings].sort((a, b) => {
+    const sevDiff = SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity];
+    if (sevDiff !== 0) return sevDiff;
+    const titleDiff = a.title.localeCompare(b.title);
+    if (titleDiff !== 0) return titleDiff;
+    return (a.file ?? "").localeCompare(b.file ?? "");
+  });
+  const clusters: FindingCluster[] = [];
+  for (const finding of sorted) {
+    let merged = false;
+    for (const cluster of clusters) {
+      if (areSimilarFindings(finding, cluster.representative, similarityThreshold)) {
+        // Merge into existing cluster
+        if (finding.file && !cluster.affectedFiles.includes(finding.file)) {
+          cluster.affectedFiles.push(finding.file);
+        }
+        // Keep the longer description as representative
+        if (finding.description.length > cluster.representative.description.length) {
+          const prev = cluster.representative;
+          const files = cluster.affectedFiles;
+          cluster.representative = { ...finding };
+          cluster.affectedFiles = files;
+          // Carry forward suggestion from previous representative if new one lacks it
+          if (!cluster.representative.suggestion && prev.suggestion) {
+            cluster.representative.suggestion = prev.suggestion;
+          }
+        }
+        merged = true;
+        break;
+      }
+    }
+    if (!merged) {
+      clusters.push({
+        representative: { ...finding },
+        affectedFiles: finding.file ? [finding.file] : [],
+      });
+    }
+  }
+  // Convert clusters back to findings, attaching affectedFiles metadata
+  return clusters.map((cluster) => {
+    const finding = { ...cluster.representative };
+    if (cluster.affectedFiles.length > 1) {
+      // Store affected files for pattern generalization
+      (finding as FindingWithFiles)._affectedFiles = cluster.affectedFiles;
+    }
+    return finding;
+  });
+}
+/** Internal extension to carry affected files through the pipeline. */
+interface FindingWithFiles extends ReviewFinding {
+  _affectedFiles?: string[];
+}
+// ---------------------------------------------------------------------------
+// Similarity
+// ---------------------------------------------------------------------------
+/**
+ * Check if two findings are semantically similar (candidates for deduplication).
+ *
+ * Criteria:
+ * - Same severity
+ * - Same category (both null or both equal)
+ * - Title Jaccard similarity >= threshold
+ * - Different files (don't merge findings pointing to the same file)
+ */
+export function areSimilarFindings(
+  a: ReviewFinding,
+  b: ReviewFinding,
+  threshold: number = DEFAULT_MERGER_CONFIG.similarityThreshold,
+): boolean {
+  // Severity must match
+  if (a.severity !== b.severity) return false;
+  // Category must match (both undefined or both equal)
+  if ((a.category ?? null) !== (b.category ?? null)) return false;
+  // Don't merge findings about the same file (they're likely different issues)
+  if (a.file && b.file && a.file === b.file) return false;
+  // Title similarity via Jaccard
+  return jaccardSimilarity(a.title, b.title) >= threshold;
+}
+/**
+ * Compute Jaccard similarity between two strings based on word tokens.
+ * Returns a value between 0 (no overlap) and 1 (identical).
+ */
+export function jaccardSimilarity(a: string, b: string): number {
+  const wordsA = tokenize(a);
+  const wordsB = tokenize(b);
+  if (wordsA.size === 0 && wordsB.size === 0) return 1;
+  if (wordsA.size === 0 || wordsB.size === 0) return 0;
+  let intersectionSize = 0;
+  for (const word of wordsA) {
+    if (wordsB.has(word)) intersectionSize++;
+  }
+  const unionSize = new Set([...wordsA, ...wordsB]).size;
+  return intersectionSize / unionSize;
+}
+/**
+ * Tokenize a string into a set of lowercase words.
+ */
+function tokenize(text: string): Set<string> {
+  return new Set(
+    text
+      .toLowerCase()
+      .split(/\s+/)
+      .filter((w) => w.length > 0),
+  );
+}
+// ---------------------------------------------------------------------------
+// Pattern Generalization
+// ---------------------------------------------------------------------------
+/**
+ * Generalize findings that appear in many files into pattern comments.
+ *
+ * If a finding has been deduplicated across >= threshold files, it becomes
+ * a repo-wide pattern comment without a specific file reference.
+ */
+export function generalizePatterns(
+  findings: ReviewFinding[],
+  threshold: number = DEFAULT_MERGER_CONFIG.patternThreshold,
+): ReviewFinding[] {
+  return findings.map((finding) => {
+    const files = (finding as FindingWithFiles)._affectedFiles;
+    if (files && files.length >= threshold) {
+      // Generalize to pattern comment
+      const fileList = files.join(", ");
+      const result: ReviewFinding = {
+        ...finding,
+        title: `${finding.title} (pattern)`,
+        description: `${finding.description}\n\nFound in ${files.length} files: ${fileList}`,
+        file: undefined,
+        line: undefined,
+      };
+      // Clean internal metadata
+      delete (result as FindingWithFiles)._affectedFiles;
+      return result;
+    }
+    // Below threshold — keep file reference, clean metadata
+    const result = { ...finding };
+    delete (result as FindingWithFiles)._affectedFiles;
+    // If deduplicated across 2 files, note the other file in description
+    if (files && files.length === 2) {
+      const otherFile = files.find((f) => f !== finding.file);
+      if (otherFile) {
+        result.description = `${finding.description}\n\nAlso found in: ${otherFile}`;
+        // Clear line — it may reference the wrong file after representative swap
+        result.line = undefined;
+      }
+    }
+    return result;
+  });
+}
+// ---------------------------------------------------------------------------
+// Sorting
+// ---------------------------------------------------------------------------
+/**
+ * Sort findings by severity (CRITICAL > HIGH > MEDIUM > LOW),
+ * then alphabetically by title within the same severity.
+ */
+export function sortFindings(findings: ReviewFinding[]): ReviewFinding[] {
+  return [...findings].sort((a, b) => {
+    const severityDiff = SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity];
+    if (severityDiff !== 0) return severityDiff;
+    return a.title.localeCompare(b.title);
+  });
+}
+// ---------------------------------------------------------------------------
+// Summary Builder
+// ---------------------------------------------------------------------------
+/**
+ * Build a human-readable summary from merged findings and batch errors.
+ */
+function buildSummary(findings: ReviewFinding[], errors: BatchError[], stats: ReviewStats): string {
+  const parts: string[] = [];
+  if (findings.length === 0) {
+    parts.push("No issues found. Code looks good to merge.");
+  } else {
+    const counts: string[] = [];
+    if (stats.critical > 0) counts.push(`${stats.critical} critical`);
+    if (stats.high > 0) counts.push(`${stats.high} high`);
+    if (stats.medium > 0) counts.push(`${stats.medium} medium`);
+    if (stats.low > 0) counts.push(`${stats.low} low`);
+    parts.push(`Found ${findings.length} issue${findings.length === 1 ? "" : "s"}: ${counts.join(", ")}.`);
+  }
+  if (errors.length > 0) {
+    const totalAffected = errors.reduce((sum, e) => sum + e.filesAffected.length, 0);
+    parts.push(
+      `Note: ${errors.length} review batch${errors.length === 1 ? "" : "es"} failed (${totalAffected} file${totalAffected === 1 ? "" : "s"} not reviewed). Findings may be incomplete.`,
+    );
+  }
+  return parts.join(" ");
+}

package/src/review/engine.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import { createOutlineTool } from "../agent/tools/outline";
 import { createFindSymbolTool } from "../agent/tools/find-symbol";
 import { calculateStats, determineRecommendation } from "../types/review";
 import type { ReviewResult, ReviewFinding, Severity, ContextSource } from "../types/review";
+import { parseReviewResponseWithFallback, REVIEW_JSON_INSTRUCTION } from "./json-parser";
 import type { Result } from "../types/result";
 import { ok, err } from "../types/result";
 import { loadConfig, getStrictnessModifier } from "../config";
@@ -96,9 +97,9 @@ export async function runReview(
     createFindSymbolTool({ workingDir: config.workingDir }),
   ];
-  // Build system prompt with strictness modifier
+  // Build system prompt with strictness modifier and JSON instruction
   const strictnessGuidance = getStrictnessModifier(strictness);
-  let systemPrompt = REVIEW_SYSTEM_PROMPT + `\n\n## Strictness Setting: ${strictness.toUpperCase()}\n${strictnessGuidance}`;
+  let systemPrompt = REVIEW_SYSTEM_PROMPT + REVIEW_JSON_INSTRUCTION + `\n\n## Strictness Setting: ${strictness.toUpperCase()}\n${strictnessGuidance}`;
   // Track context sources for citation
   const contextSources: ContextSource[] = [];
@@ -163,8 +164,8 @@ export async function runReview(
     });
   }
-  // Parse the response into structured findings
-  const reviewResult = parseReviewResponse(agentResult.value, diffSummary, contextSources);
+  // Parse the response into structured findings (JSON first, regex fallback)
+  const reviewResult = parseReviewResponseWithFallback(agentResult.value, diffSummary, contextSources);
   return ok(reviewResult);
 }
@@ -172,7 +173,7 @@ export async function runReview(
 /**
  * System prompt for the code review agent.
  */
-const REVIEW_SYSTEM_PROMPT = `You are a pragmatic code reviewer. Your job is to catch REAL problems, not nitpick.
+export const REVIEW_SYSTEM_PROMPT = `You are a pragmatic code reviewer. Your job is to catch REAL problems, not nitpick.
 ## Tools Available
 - read_file: Read source files to understand context
@@ -181,21 +182,43 @@ const REVIEW_SYSTEM_PROMPT = `You are a pragmatic code reviewer. Your job is to
 - get_outline: Get functions/classes in a file (fast overview)
 - find_symbol: Find where a function or class is defined
+## Tool Use Strategy
+Before flagging any issue, you MUST verify your understanding:
+1. **Read the full file** for any function being changed — don't judge from diff alone
+2. **Use find_symbol** to trace how changed functions are called by other code
+3. **Use grep** to find other usages of modified functions or variables
+4. **Only flag an issue if you have verified it** by reading the surrounding context
 ## What to Look For (in priority order)
 1. **CRITICAL**: Security vulnerabilities, data loss, crashes
 2. **HIGH**: Bugs that WILL break functionality in production
 3. **MEDIUM**: Significant code quality issues (not style nits)
+## Business Logic Patterns to Detect
+Focus on real logic errors that cause incorrect behavior:
+- **Off-by-one errors**: Wrong boundary conditions, < vs <=, array index issues
+- **Null/undefined handling**: Missing null checks on values that can be null
+- **Race conditions**: Shared state without synchronization, async ordering bugs
+- **Incorrect boolean logic**: Inverted conditions, wrong operator (AND vs OR)
+- **Missing error paths**: Happy-path-only code that ignores failure cases in data flows
+- **Wrong operator**: Using = instead of ==, + instead of -, incorrect comparisons
+- **State management bugs**: Mutating shared state, stale closures, incorrect resets
+- **Type coercion issues**: Implicit conversions causing unexpected behavior
 ## What NOT to Flag
 - Style preferences or "I would do it differently"
 - Theoretical performance issues without evidence
 - Missing edge case tests for working code
 - "Could be refactored" suggestions
 - Code that works but isn't perfect
+- Naming convention preferences
+- Comment formatting or missing comments
+- Import ordering or grouping
 ## Key Principle
 Most PRs should have 0-2 findings. If you're finding 5+ issues, you're being too picky.
 Only flag issues you'd actually block a PR for in a real code review.
+Verify every finding with tool use before reporting it.
 ## Response Format
 SUMMARY: [1-2 sentences - is this code ready to merge?]
@@ -228,15 +251,15 @@ ${diff.length > 15000 ? "\n(diff truncated, use tools to read full files if need
 ## Instructions
 1. First, understand what the changes are doing
-2. Use tools to explore related code if needed (find usages, read implementations)
-3. Identify any issues with the changes
+2. Use tools to explore related code — read full files, trace call chains, check usages
+3. For each potential issue, verify it by reading surrounding context before flagging
 4. Provide your review in the specified format`;
 }
 /**
  * Parse the agent's response into structured findings.
  */
-function parseReviewResponse(response: string, diffSummary: string, contextSources: ContextSource[] = []): ReviewResult {
+export function parseReviewResponse(response: string, diffSummary: string, contextSources: ContextSource[] = []): ReviewResult {
   const findings: ReviewFinding[] = [];
   // Extract summary