@eduardbar/drift 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@eduardbar/drift",
3
- "version": "0.7.0",
3
+ "version": "0.9.0",
4
4
  "description": "Detect silent technical debt left by AI-generated code",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
package/src/analyzer.ts CHANGED
@@ -1,5 +1,8 @@
1
1
  import * as fs from 'node:fs'
2
+ import * as crypto from 'node:crypto'
2
3
  import * as path from 'node:path'
4
+ import * as os from 'node:os'
5
+ import { execSync } from 'node:child_process'
3
6
  import {
4
7
  Project,
5
8
  SourceFile,
@@ -10,7 +13,11 @@ import {
10
13
  FunctionExpression,
11
14
  MethodDeclaration,
12
15
  } from 'ts-morph'
13
- import type { DriftIssue, FileReport, DriftConfig, LayerDefinition, ModuleBoundary } from './types.js'
16
+ import type {
17
+ DriftIssue, FileReport, DriftConfig, LayerDefinition, ModuleBoundary,
18
+ HistoricalAnalysis, TrendDataPoint, BlameAttribution, DriftTrendReport, DriftBlameReport,
19
+ } from './types.js'
20
+ import { buildReport } from './reporter.js'
14
21
 
15
22
  // Rules and their drift score weight
16
23
  export const RULE_WEIGHTS: Record<string, { severity: DriftIssue['severity']; weight: number }> = {
@@ -45,6 +52,8 @@ export const RULE_WEIGHTS: Record<string, { severity: DriftIssue['severity']; we
45
52
  'inconsistent-error-handling': { severity: 'warning', weight: 8 },
46
53
  'unnecessary-abstraction': { severity: 'warning', weight: 7 },
47
54
  'naming-inconsistency': { severity: 'warning', weight: 6 },
55
+ // Phase 8: semantic duplication
56
+ 'semantic-duplication': { severity: 'warning', weight: 12 },
48
57
  }
49
58
 
50
59
  type FunctionLike = FunctionDeclaration | ArrowFunction | FunctionExpression | MethodDeclaration
@@ -873,6 +882,123 @@ function calculateScore(issues: DriftIssue[]): number {
873
882
  return Math.min(100, raw)
874
883
  }
875
884
 
885
+ // ---------------------------------------------------------------------------
886
+ // Phase 8: Semantic duplication — AST fingerprinting helpers
887
+ // ---------------------------------------------------------------------------
888
+
889
+ type FunctionLikeNode = FunctionDeclaration | ArrowFunction | FunctionExpression | MethodDeclaration
890
+
891
+ /** Normalize a function body to a canonical string (Type-2 clone detection).
892
+ * Variable names, parameter names, and numeric/string literals are replaced
893
+ * with canonical tokens so that two functions with identical logic but
894
+ * different identifiers produce the same fingerprint.
895
+ */
896
+ function normalizeFunctionBody(fn: FunctionLikeNode): string {
897
+ // Build a substitution map: localName → canonical token
898
+ const subst = new Map<string, string>()
899
+
900
+ // Map parameters first
901
+ for (const [i, param] of fn.getParameters().entries()) {
902
+ const name = param.getName()
903
+ if (name && name !== '_') subst.set(name, `P${i}`)
904
+ }
905
+
906
+ // Map locally declared variables (VariableDeclaration)
907
+ let varIdx = 0
908
+ fn.forEachDescendant(node => {
909
+ if (node.getKind() === SyntaxKind.VariableDeclaration) {
910
+ const nameNode = (node as import('ts-morph').VariableDeclaration).getNameNode()
911
+ // Support destructuring — getNameNode() may be a BindingPattern
912
+ if (nameNode.getKind() === SyntaxKind.Identifier) {
913
+ const name = nameNode.getText()
914
+ if (!subst.has(name)) subst.set(name, `V${varIdx++}`)
915
+ }
916
+ }
917
+ })
918
+
919
+ function serializeNode(node: Node): string {
920
+ const kind = node.getKindName()
921
+
922
+ switch (node.getKind()) {
923
+ case SyntaxKind.Identifier: {
924
+ const text = node.getText()
925
+ return subst.get(text) ?? text // external refs (Math, console) kept as-is
926
+ }
927
+ case SyntaxKind.NumericLiteral:
928
+ return 'NL'
929
+ case SyntaxKind.StringLiteral:
930
+ case SyntaxKind.NoSubstitutionTemplateLiteral:
931
+ return 'SL'
932
+ case SyntaxKind.TrueKeyword:
933
+ return 'TRUE'
934
+ case SyntaxKind.FalseKeyword:
935
+ return 'FALSE'
936
+ case SyntaxKind.NullKeyword:
937
+ return 'NULL'
938
+ }
939
+
940
+ const children = node.getChildren()
941
+ if (children.length === 0) return kind
942
+
943
+ const childStr = children.map(serializeNode).join('|')
944
+ return `${kind}(${childStr})`
945
+ }
946
+
947
+ const body = fn.getBody()
948
+ if (!body) return ''
949
+ return serializeNode(body)
950
+ }
951
+
952
+ /** Return a SHA-256 fingerprint for a function body (normalized). */
953
+ function fingerprintFunction(fn: FunctionLikeNode): string {
954
+ const normalized = normalizeFunctionBody(fn)
955
+ return crypto.createHash('sha256').update(normalized).digest('hex')
956
+ }
957
+
958
+ /** Return all function-like nodes from a SourceFile that are worth comparing:
959
+ * - At least MIN_LINES lines in their body
960
+ * - Not test helpers (describe/it/test/beforeEach/afterEach)
961
+ */
962
+ const MIN_LINES = 8
963
+
964
+ function collectFunctions(sf: SourceFile): Array<{ fn: FunctionLikeNode; name: string; line: number; col: number }> {
965
+ const results: Array<{ fn: FunctionLikeNode; name: string; line: number; col: number }> = []
966
+
967
+ const kinds = [
968
+ SyntaxKind.FunctionDeclaration,
969
+ SyntaxKind.FunctionExpression,
970
+ SyntaxKind.ArrowFunction,
971
+ SyntaxKind.MethodDeclaration,
972
+ ] as const
973
+
974
+ for (const kind of kinds) {
975
+ for (const node of sf.getDescendantsOfKind(kind)) {
976
+ const body = (node as FunctionLikeNode).getBody()
977
+ if (!body) continue
978
+
979
+ const start = body.getStartLineNumber()
980
+ const end = body.getEndLineNumber()
981
+ if (end - start + 1 < MIN_LINES) continue
982
+
983
+ // Skip test-framework helpers
984
+ const name = node.getKind() === SyntaxKind.FunctionDeclaration
985
+ ? (node as FunctionDeclaration).getName() ?? '<anonymous>'
986
+ : node.getKind() === SyntaxKind.MethodDeclaration
987
+ ? (node as MethodDeclaration).getName()
988
+ : '<anonymous>'
989
+
990
+ if (['describe', 'it', 'test', 'beforeEach', 'afterEach', 'beforeAll', 'afterAll'].includes(name)) continue
991
+
992
+ const pos = node.getStart()
993
+ const lineInfo = sf.getLineAndColumnAtPos(pos)
994
+
995
+ results.push({ fn: node as FunctionLikeNode, name, line: lineInfo.line, col: lineInfo.column })
996
+ }
997
+ }
998
+
999
+ return results
1000
+ }
1001
+
876
1002
  // ---------------------------------------------------------------------------
877
1003
  // Public API
878
1004
  // ---------------------------------------------------------------------------
@@ -1284,5 +1410,539 @@ export function analyzeProject(targetPath: string, config?: DriftConfig): FileRe
1284
1410
  }
1285
1411
  }
1286
1412
 
1413
+ // ── Phase 8: semantic-duplication ────────────────────────────────────────
1414
+ // Build a fingerprint → [{filePath, fnName, line, col}] map across all files
1415
+ const fingerprintMap = new Map<string, Array<{ filePath: string; name: string; line: number; col: number }>>()
1416
+
1417
+ for (const sf of sourceFiles) {
1418
+ const sfPath = sf.getFilePath()
1419
+ for (const { fn, name, line, col } of collectFunctions(sf)) {
1420
+ const fp = fingerprintFunction(fn)
1421
+ if (!fingerprintMap.has(fp)) fingerprintMap.set(fp, [])
1422
+ fingerprintMap.get(fp)!.push({ filePath: sfPath, name, line, col })
1423
+ }
1424
+ }
1425
+
1426
+ // For each fingerprint with 2+ functions: report each as a duplicate of the others
1427
+ for (const [, entries] of fingerprintMap) {
1428
+ if (entries.length < 2) continue
1429
+
1430
+ for (const entry of entries) {
1431
+ const report = reportByPath.get(entry.filePath)
1432
+ if (!report) continue
1433
+
1434
+ // Build the "duplicated in" list (all other locations)
1435
+ const others = entries
1436
+ .filter(e => e !== entry)
1437
+ .map(e => {
1438
+ const rel = path.relative(targetPath, e.filePath).replace(/\\/g, '/')
1439
+ return `${rel}:${e.line} (${e.name})`
1440
+ })
1441
+ .join(', ')
1442
+
1443
+ const weight = RULE_WEIGHTS['semantic-duplication']?.weight ?? 12
1444
+ report.issues.push({
1445
+ rule: 'semantic-duplication',
1446
+ severity: 'warning',
1447
+ message: `Function '${entry.name}' is semantically identical to: ${others}`,
1448
+ line: entry.line,
1449
+ column: entry.col,
1450
+ snippet: `function ${entry.name} — duplicated in ${entries.length - 1} other location${entries.length > 2 ? 's' : ''}`,
1451
+ })
1452
+ report.score = Math.min(100, report.score + weight)
1453
+ }
1454
+ }
1455
+
1287
1456
  return reports
1288
1457
  }
1458
+
1459
+ // ---------------------------------------------------------------------------
1460
+ // Git helpers
1461
+ // ---------------------------------------------------------------------------
1462
+
1463
+ /** Analyse a file given its absolute path string (wraps analyzeFile). */
1464
+ function analyzeFilePath(filePath: string): FileReport {
1465
+ const proj = new Project({
1466
+ skipAddingFilesFromTsConfig: true,
1467
+ compilerOptions: { allowJs: true },
1468
+ })
1469
+ const sf = proj.addSourceFileAtPath(filePath)
1470
+ return analyzeFile(sf)
1471
+ }
1472
+
1473
+ /**
1474
+ * Execute a git command synchronously and return stdout.
1475
+ * Throws a descriptive error if the command fails or git is not available.
1476
+ */
1477
+ function execGit(cmd: string, cwd: string): string {
1478
+ try {
1479
+ return execSync(cmd, { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim()
1480
+ } catch (err) {
1481
+ const msg = err instanceof Error ? err.message : String(err)
1482
+ throw new Error(`Git command failed: ${cmd}\n${msg}`)
1483
+ }
1484
+ }
1485
+
1486
+ /**
1487
+ * Verify the given directory is a git repository.
1488
+ * Throws if git is not available or the directory is not a repo.
1489
+ */
1490
+ function assertGitRepo(cwd: string): void {
1491
+ try {
1492
+ execGit('git rev-parse --is-inside-work-tree', cwd)
1493
+ } catch {
1494
+ throw new Error(`Directory is not a git repository: ${cwd}`)
1495
+ }
1496
+ }
1497
+
1498
+ // ---------------------------------------------------------------------------
1499
+ // Historical analysis helpers
1500
+ // ---------------------------------------------------------------------------
1501
+
1502
+ /**
1503
+ * Analyse a single file as it existed at a given commit hash.
1504
+ * Writes the blob to a temp file, runs analyzeFile, then cleans up.
1505
+ */
1506
+ async function analyzeFileAtCommit(
1507
+ filePath: string,
1508
+ commitHash: string,
1509
+ projectRoot: string,
1510
+ ): Promise<FileReport> {
1511
+ const relPath = path.relative(projectRoot, filePath).replace(/\\/g, '/')
1512
+ const blob = execGit(`git show ${commitHash}:${relPath}`, projectRoot)
1513
+
1514
+ const tmpFile = path.join(os.tmpdir(), `drift-${crypto.randomBytes(8).toString('hex')}.ts`)
1515
+ try {
1516
+ fs.writeFileSync(tmpFile, blob, 'utf8')
1517
+ const report = analyzeFilePath(tmpFile)
1518
+ // Replace temp path with original for readable output
1519
+ return { ...report, path: filePath }
1520
+ } finally {
1521
+ try { fs.unlinkSync(tmpFile) } catch { /* ignore cleanup errors */ }
1522
+ }
1523
+ }
1524
+
1525
+ /**
1526
+ * Analyse all TypeScript files changed in a single commit.
1527
+ */
1528
+ async function analyzeSingleCommit(
1529
+ commitHash: string,
1530
+ targetPath: string,
1531
+ ): Promise<HistoricalAnalysis> {
1532
+ // --name-only lists changed files; format gives metadata
1533
+ const raw = execGit(
1534
+ `git show --name-only --format="%H|%ai|%an|%s" ${commitHash}`,
1535
+ targetPath,
1536
+ )
1537
+
1538
+ const lines = raw.split('\n')
1539
+ // First non-empty line is the metadata line
1540
+ const metaLine = lines[0] ?? ''
1541
+ const [hash, dateStr, author, ...msgParts] = metaLine.split('|')
1542
+ const message = msgParts.join('|').trim()
1543
+ const commitDate = new Date(dateStr ?? '')
1544
+
1545
+ // Collect changed .ts/.tsx files (lines after the empty separator)
1546
+ const changedFiles: string[] = []
1547
+ let pastSeparator = false
1548
+ for (const line of lines.slice(1)) {
1549
+ if (!pastSeparator && line.trim() === '') { pastSeparator = true; continue }
1550
+ if (pastSeparator && (line.endsWith('.ts') || line.endsWith('.tsx'))) {
1551
+ changedFiles.push(path.join(targetPath, line.trim()))
1552
+ }
1553
+ }
1554
+
1555
+ const fileReports = await Promise.all(
1556
+ changedFiles.map(f => analyzeFileAtCommit(f, hash ?? commitHash, targetPath).catch(() => null)),
1557
+ )
1558
+
1559
+ const validReports = fileReports.filter((r): r is FileReport => r !== null)
1560
+ const totalScore = validReports.reduce((s, r) => s + r.score, 0)
1561
+ const averageScore = validReports.length > 0 ? totalScore / validReports.length : 0
1562
+
1563
+ return {
1564
+ commitHash: hash ?? commitHash,
1565
+ commitDate,
1566
+ author: author ?? '',
1567
+ message,
1568
+ files: validReports,
1569
+ totalScore,
1570
+ averageScore,
1571
+ }
1572
+ }
1573
+
1574
+ /**
1575
+ * Run historical analysis over all commits since a given date.
1576
+ * Returns results ordered chronologically (oldest first).
1577
+ */
1578
+ async function analyzeHistoricalCommits(
1579
+ sinceDate: Date,
1580
+ targetPath: string,
1581
+ maxCommits: number,
1582
+ ): Promise<HistoricalAnalysis[]> {
1583
+ assertGitRepo(targetPath)
1584
+
1585
+ const isoDate = sinceDate.toISOString()
1586
+ const raw = execGit(
1587
+ `git log --since="${isoDate}" --format="%H" --max-count=${maxCommits}`,
1588
+ targetPath,
1589
+ )
1590
+
1591
+ if (!raw) return []
1592
+
1593
+ const hashes = raw.split('\n').filter(Boolean)
1594
+ const analyses = await Promise.all(
1595
+ hashes.map(h => analyzeSingleCommit(h, targetPath).catch(() => null)),
1596
+ )
1597
+
1598
+ return analyses
1599
+ .filter((a): a is HistoricalAnalysis => a !== null)
1600
+ .sort((a, b) => a.commitDate.getTime() - b.commitDate.getTime())
1601
+ }
1602
+
1603
+ // ---------------------------------------------------------------------------
1604
+ // TrendAnalyzer
1605
+ // ---------------------------------------------------------------------------
1606
+
1607
+ export class TrendAnalyzer {
1608
+ private readonly projectPath: string
1609
+ private readonly config: DriftConfig | undefined
1610
+
1611
+ constructor(projectPath: string, config?: DriftConfig) {
1612
+ this.projectPath = projectPath
1613
+ this.config = config
1614
+ }
1615
+
1616
+ // --- Static utility methods -----------------------------------------------
1617
+
1618
+ static calculateMovingAverage(data: TrendDataPoint[], windowSize: number): number[] {
1619
+ return data.map((_, i) => {
1620
+ const start = Math.max(0, i - windowSize + 1)
1621
+ const window = data.slice(start, i + 1)
1622
+ return window.reduce((s, p) => s + p.score, 0) / window.length
1623
+ })
1624
+ }
1625
+
1626
+ static linearRegression(data: TrendDataPoint[]): { slope: number; intercept: number; r2: number } {
1627
+ const n = data.length
1628
+ if (n < 2) return { slope: 0, intercept: data[0]?.score ?? 0, r2: 0 }
1629
+
1630
+ const xs = data.map((_, i) => i)
1631
+ const ys = data.map(p => p.score)
1632
+
1633
+ const xMean = xs.reduce((s, x) => s + x, 0) / n
1634
+ const yMean = ys.reduce((s, y) => s + y, 0) / n
1635
+
1636
+ const ssXX = xs.reduce((s, x) => s + (x - xMean) ** 2, 0)
1637
+ const ssXY = xs.reduce((s, x, i) => s + (x - xMean) * (ys[i]! - yMean), 0)
1638
+ const ssYY = ys.reduce((s, y) => s + (y - yMean) ** 2, 0)
1639
+
1640
+ const slope = ssXX === 0 ? 0 : ssXY / ssXX
1641
+ const intercept = yMean - slope * xMean
1642
+ const r2 = ssYY === 0 ? 1 : (ssXY ** 2) / (ssXX * ssYY)
1643
+
1644
+ return { slope, intercept, r2 }
1645
+ }
1646
+
1647
+ /** Generate a simple horizontal ASCII bar chart (one bar per data point). */
1648
+ static generateTrendChart(data: TrendDataPoint[]): string {
1649
+ if (data.length === 0) return '(no data)'
1650
+
1651
+ const maxScore = Math.max(...data.map(p => p.score), 1)
1652
+ const chartWidth = 40
1653
+
1654
+ const lines = data.map(p => {
1655
+ const barLen = Math.round((p.score / maxScore) * chartWidth)
1656
+ const bar = '█'.repeat(barLen)
1657
+ const dateStr = p.date.toISOString().slice(0, 10)
1658
+ return `${dateStr} │${bar.padEnd(chartWidth)} ${p.score.toFixed(1)}`
1659
+ })
1660
+
1661
+ return lines.join('\n')
1662
+ }
1663
+
1664
+ // --- Instance method -------------------------------------------------------
1665
+
1666
+ async analyzeTrend(options: {
1667
+ period?: 'week' | 'month' | 'quarter' | 'year'
1668
+ since?: string
1669
+ until?: string
1670
+ }): Promise<DriftTrendReport> {
1671
+ assertGitRepo(this.projectPath)
1672
+
1673
+ const periodDays: Record<string, number> = {
1674
+ week: 7, month: 30, quarter: 90, year: 365,
1675
+ }
1676
+ const days = periodDays[options.period ?? 'month'] ?? 30
1677
+ const sinceDate = options.since
1678
+ ? new Date(options.since)
1679
+ : new Date(Date.now() - days * 24 * 60 * 60 * 1000)
1680
+
1681
+ const historicalAnalyses = await analyzeHistoricalCommits(sinceDate, this.projectPath, 100)
1682
+
1683
+ const trendPoints: TrendDataPoint[] = historicalAnalyses.map(h => ({
1684
+ date: h.commitDate,
1685
+ score: h.averageScore,
1686
+ fileCount: h.files.length,
1687
+ avgIssuesPerFile: h.files.length > 0
1688
+ ? h.files.reduce((s, f) => s + f.issues.length, 0) / h.files.length
1689
+ : 0,
1690
+ }))
1691
+
1692
+ const regression = TrendAnalyzer.linearRegression(trendPoints)
1693
+
1694
+ // Current state report
1695
+ const currentFiles = analyzeProject(this.projectPath, this.config)
1696
+ const baseReport = buildReport(this.projectPath, currentFiles)
1697
+
1698
+ return {
1699
+ ...baseReport,
1700
+ trend: trendPoints,
1701
+ regression,
1702
+ }
1703
+ }
1704
+ }
1705
+
1706
+ // ---------------------------------------------------------------------------
1707
+ // BlameAnalyzer
1708
+ // ---------------------------------------------------------------------------
1709
+
1710
+ interface GitBlameEntry {
1711
+ hash: string
1712
+ author: string
1713
+ email: string
1714
+ line: string
1715
+ }
1716
+
1717
+ function parseGitBlame(blameOutput: string): GitBlameEntry[] {
1718
+ const entries: GitBlameEntry[] = []
1719
+ const lines = blameOutput.split('\n')
1720
+ let i = 0
1721
+
1722
+ while (i < lines.length) {
1723
+ const headerLine = lines[i]
1724
+ if (!headerLine || headerLine.trim() === '') { i++; continue }
1725
+
1726
+ // Porcelain blame format: first line is "<hash> <orig-line> <final-line> [<num-lines>]"
1727
+ const headerMatch = headerLine.match(/^([0-9a-f]{40})\s/)
1728
+ if (!headerMatch) { i++; continue }
1729
+
1730
+ const hash = headerMatch[1]!
1731
+ let author = ''
1732
+ let email = ''
1733
+ let codeLine = ''
1734
+ i++
1735
+
1736
+ while (i < lines.length && !lines[i]!.match(/^[0-9a-f]{40}\s/)) {
1737
+ const l = lines[i]!
1738
+ if (l.startsWith('author ')) author = l.slice(7).trim()
1739
+ else if (l.startsWith('author-mail ')) email = l.slice(12).replace(/[<>]/g, '').trim()
1740
+ else if (l.startsWith('\t')) codeLine = l.slice(1)
1741
+ i++
1742
+ }
1743
+
1744
+ entries.push({ hash, author, email, line: codeLine })
1745
+ }
1746
+
1747
+ return entries
1748
+ }
1749
+
1750
+ export class BlameAnalyzer {
1751
+ private readonly projectPath: string
1752
+ private readonly config: DriftConfig | undefined
1753
+
1754
+ constructor(projectPath: string, config?: DriftConfig) {
1755
+ this.projectPath = projectPath
1756
+ this.config = config
1757
+ }
1758
+
1759
+ /** Blame a single file: returns per-author attribution. */
1760
+ static async analyzeFileBlame(filePath: string): Promise<BlameAttribution[]> {
1761
+ const dir = path.dirname(filePath)
1762
+ assertGitRepo(dir)
1763
+
1764
+ const blameOutput = execGit(`git blame --porcelain "${filePath}"`, dir)
1765
+ const entries = parseGitBlame(blameOutput)
1766
+
1767
+ // Analyse issues in the file
1768
+ const report = analyzeFilePath(filePath)
1769
+
1770
+ // Map line numbers of issues to authors
1771
+ const issuesByLine = new Map<number, number>()
1772
+ for (const issue of report.issues) {
1773
+ issuesByLine.set(issue.line, (issuesByLine.get(issue.line) ?? 0) + 1)
1774
+ }
1775
+
1776
+ // Aggregate by author
1777
+ const byAuthor = new Map<string, BlameAttribution>()
1778
+ entries.forEach((entry, idx) => {
1779
+ const key = entry.email || entry.author
1780
+ if (!byAuthor.has(key)) {
1781
+ byAuthor.set(key, {
1782
+ author: entry.author,
1783
+ email: entry.email,
1784
+ commits: 0,
1785
+ linesChanged: 0,
1786
+ issuesIntroduced: 0,
1787
+ avgScoreImpact: 0,
1788
+ })
1789
+ }
1790
+ const attr = byAuthor.get(key)!
1791
+ attr.linesChanged++
1792
+ const lineNum = idx + 1
1793
+ if (issuesByLine.has(lineNum)) {
1794
+ attr.issuesIntroduced += issuesByLine.get(lineNum)!
1795
+ }
1796
+ })
1797
+
1798
+ // Count unique commits per author
1799
+ const commitsByAuthor = new Map<string, Set<string>>()
1800
+ for (const entry of entries) {
1801
+ const key = entry.email || entry.author
1802
+ if (!commitsByAuthor.has(key)) commitsByAuthor.set(key, new Set())
1803
+ commitsByAuthor.get(key)!.add(entry.hash)
1804
+ }
1805
+
1806
+ const total = entries.length || 1
1807
+ const results: BlameAttribution[] = []
1808
+ for (const [key, attr] of byAuthor) {
1809
+ attr.commits = commitsByAuthor.get(key)?.size ?? 0
1810
+ attr.avgScoreImpact = (attr.linesChanged / total) * report.score
1811
+ results.push(attr)
1812
+ }
1813
+
1814
+ return results.sort((a, b) => b.issuesIntroduced - a.issuesIntroduced)
1815
+ }
1816
+
1817
+ /** Blame for a specific rule across all files in targetPath. */
1818
+ static async analyzeRuleBlame(rule: string, targetPath: string): Promise<BlameAttribution[]> {
1819
+ assertGitRepo(targetPath)
1820
+
1821
+ const tsFiles = fs
1822
+ .readdirSync(targetPath, { recursive: true, encoding: 'utf8' })
1823
+ .filter((f): f is string => (f.endsWith('.ts') || f.endsWith('.tsx')) && !f.includes('node_modules'))
1824
+ .map(f => path.join(targetPath, f))
1825
+
1826
+ const combined = new Map<string, BlameAttribution>()
1827
+
1828
+ for (const file of tsFiles) {
1829
+ const report = analyzeFilePath(file)
1830
+ const ruleIssues = report.issues.filter(i => i.rule === rule)
1831
+ if (ruleIssues.length === 0) continue
1832
+
1833
+ let blameEntries: GitBlameEntry[] = []
1834
+ try {
1835
+ const blameOutput = execGit(`git blame --porcelain "${file}"`, targetPath)
1836
+ blameEntries = parseGitBlame(blameOutput)
1837
+ } catch { continue }
1838
+
1839
+ for (const issue of ruleIssues) {
1840
+ const entry = blameEntries[issue.line - 1]
1841
+ if (!entry) continue
1842
+ const key = entry.email || entry.author
1843
+ if (!combined.has(key)) {
1844
+ combined.set(key, {
1845
+ author: entry.author,
1846
+ email: entry.email,
1847
+ commits: 0,
1848
+ linesChanged: 0,
1849
+ issuesIntroduced: 0,
1850
+ avgScoreImpact: 0,
1851
+ })
1852
+ }
1853
+ const attr = combined.get(key)!
1854
+ attr.issuesIntroduced++
1855
+ attr.avgScoreImpact += RULE_WEIGHTS[rule]?.weight ?? 5
1856
+ }
1857
+ }
1858
+
1859
+ return Array.from(combined.values()).sort((a, b) => b.issuesIntroduced - a.issuesIntroduced)
1860
+ }
1861
+
1862
+ /** Overall blame across all files and rules. */
1863
+ static async analyzeOverallBlame(targetPath: string): Promise<BlameAttribution[]> {
1864
+ assertGitRepo(targetPath)
1865
+
1866
+ const tsFiles = fs
1867
+ .readdirSync(targetPath, { recursive: true, encoding: 'utf8' })
1868
+ .filter((f): f is string => (f.endsWith('.ts') || f.endsWith('.tsx')) && !f.includes('node_modules'))
1869
+ .map(f => path.join(targetPath, f))
1870
+
1871
+ const combined = new Map<string, BlameAttribution>()
1872
+ const commitsByAuthor = new Map<string, Set<string>>()
1873
+
1874
+ for (const file of tsFiles) {
1875
+ let blameEntries: GitBlameEntry[] = []
1876
+ try {
1877
+ const blameOutput = execGit(`git blame --porcelain "${file}"`, targetPath)
1878
+ blameEntries = parseGitBlame(blameOutput)
1879
+ } catch { continue }
1880
+
1881
+ const report = analyzeFilePath(file)
1882
+ const issuesByLine = new Map<number, number>()
1883
+ for (const issue of report.issues) {
1884
+ issuesByLine.set(issue.line, (issuesByLine.get(issue.line) ?? 0) + 1)
1885
+ }
1886
+
1887
+ blameEntries.forEach((entry, idx) => {
1888
+ const key = entry.email || entry.author
1889
+ if (!combined.has(key)) {
1890
+ combined.set(key, {
1891
+ author: entry.author,
1892
+ email: entry.email,
1893
+ commits: 0,
1894
+ linesChanged: 0,
1895
+ issuesIntroduced: 0,
1896
+ avgScoreImpact: 0,
1897
+ })
1898
+ commitsByAuthor.set(key, new Set())
1899
+ }
1900
+ const attr = combined.get(key)!
1901
+ attr.linesChanged++
1902
+ commitsByAuthor.get(key)!.add(entry.hash)
1903
+ const lineNum = idx + 1
1904
+ if (issuesByLine.has(lineNum)) {
1905
+ attr.issuesIntroduced += issuesByLine.get(lineNum)!
1906
+ attr.avgScoreImpact += report.score * (1 / (blameEntries.length || 1))
1907
+ }
1908
+ })
1909
+ }
1910
+
1911
+ for (const [key, attr] of combined) {
1912
+ attr.commits = commitsByAuthor.get(key)?.size ?? 0
1913
+ }
1914
+
1915
+ return Array.from(combined.values()).sort((a, b) => b.issuesIntroduced - a.issuesIntroduced)
1916
+ }
1917
+
1918
+ // --- Instance method -------------------------------------------------------
1919
+
1920
+ async analyzeBlame(options: {
1921
+ target?: 'file' | 'rule' | 'overall'
1922
+ top?: number
1923
+ filePath?: string
1924
+ rule?: string
1925
+ }): Promise<DriftBlameReport> {
1926
+ assertGitRepo(this.projectPath)
1927
+
1928
+ let blame: BlameAttribution[] = []
1929
+ const mode = options.target ?? 'overall'
1930
+
1931
+ if (mode === 'file' && options.filePath) {
1932
+ blame = await BlameAnalyzer.analyzeFileBlame(options.filePath)
1933
+ } else if (mode === 'rule' && options.rule) {
1934
+ blame = await BlameAnalyzer.analyzeRuleBlame(options.rule, this.projectPath)
1935
+ } else {
1936
+ blame = await BlameAnalyzer.analyzeOverallBlame(this.projectPath)
1937
+ }
1938
+
1939
+ if (options.top) {
1940
+ blame = blame.slice(0, options.top)
1941
+ }
1942
+
1943
+ const currentFiles = analyzeProject(this.projectPath, this.config)
1944
+ const baseReport = buildReport(this.projectPath, currentFiles)
1945
+
1946
+ return { ...baseReport, blame }
1947
+ }
1948
+ }