@eduardbar/drift 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -18,6 +18,18 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
18
18
 
19
19
  ---
20
20
 
21
+ ## [0.8.0] - 2026-02-24
22
+
23
+ ### Added
24
+ - `semantic-duplication` rule — Type-2 AST clone detection via SHA-256 fingerprinting
25
+ - Normalizes parameter names, local variable names, and literals before hashing — detects identical logic with different variable names
26
+ - Runs cross-file across the entire project; reports each duplicate pointing to all other locations
27
+ - Minimum threshold: functions with ≥ 8 body lines (reduces noise from trivial helpers)
28
+ - Skips test framework helpers (describe, it, test, beforeEach, afterEach)
29
+ - RULE_WEIGHTS entry: severity `warning`, weight `12`
30
+
31
+ ---
32
+
21
33
  ## [0.7.0] - 2026-02-24
22
34
 
23
35
  ### Added
package/dist/analyzer.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import * as fs from 'node:fs';
2
+ import * as crypto from 'node:crypto';
2
3
  import * as path from 'node:path';
3
4
  import { Project, SyntaxKind, } from 'ts-morph';
4
5
  // Rules and their drift score weight
@@ -34,6 +35,8 @@ export const RULE_WEIGHTS = {
34
35
  'inconsistent-error-handling': { severity: 'warning', weight: 8 },
35
36
  'unnecessary-abstraction': { severity: 'warning', weight: 7 },
36
37
  'naming-inconsistency': { severity: 'warning', weight: 6 },
38
+ // Phase 8: semantic duplication
39
+ 'semantic-duplication': { severity: 'warning', weight: 12 },
37
40
  };
38
41
  function hasIgnoreComment(file, line) {
39
42
  const lines = file.getFullText().split('\n');
@@ -786,6 +789,105 @@ function calculateScore(issues) {
786
789
  }
787
790
  return Math.min(100, raw);
788
791
  }
792
+ /** Normalize a function body to a canonical string (Type-2 clone detection).
793
+ * Variable names, parameter names, and numeric/string literals are replaced
794
+ * with canonical tokens so that two functions with identical logic but
795
+ * different identifiers produce the same fingerprint.
796
+ */
797
+ function normalizeFunctionBody(fn) {
798
+ // Build a substitution map: localName → canonical token
799
+ const subst = new Map();
800
+ // Map parameters first
801
+ for (const [i, param] of fn.getParameters().entries()) {
802
+ const name = param.getName();
803
+ if (name && name !== '_')
804
+ subst.set(name, `P${i}`);
805
+ }
806
+ // Map locally declared variables (VariableDeclaration)
807
+ let varIdx = 0;
808
+ fn.forEachDescendant(node => {
809
+ if (node.getKind() === SyntaxKind.VariableDeclaration) {
810
+ const nameNode = node.getNameNode();
811
+ // Support destructuring — getNameNode() may be a BindingPattern
812
+ if (nameNode.getKind() === SyntaxKind.Identifier) {
813
+ const name = nameNode.getText();
814
+ if (!subst.has(name))
815
+ subst.set(name, `V${varIdx++}`);
816
+ }
817
+ }
818
+ });
819
+ function serializeNode(node) {
820
+ const kind = node.getKindName();
821
+ switch (node.getKind()) {
822
+ case SyntaxKind.Identifier: {
823
+ const text = node.getText();
824
+ return subst.get(text) ?? text; // external refs (Math, console) kept as-is
825
+ }
826
+ case SyntaxKind.NumericLiteral:
827
+ return 'NL';
828
+ case SyntaxKind.StringLiteral:
829
+ case SyntaxKind.NoSubstitutionTemplateLiteral:
830
+ return 'SL';
831
+ case SyntaxKind.TrueKeyword:
832
+ return 'TRUE';
833
+ case SyntaxKind.FalseKeyword:
834
+ return 'FALSE';
835
+ case SyntaxKind.NullKeyword:
836
+ return 'NULL';
837
+ }
838
+ const children = node.getChildren();
839
+ if (children.length === 0)
840
+ return kind;
841
+ const childStr = children.map(serializeNode).join('|');
842
+ return `${kind}(${childStr})`;
843
+ }
844
+ const body = fn.getBody();
845
+ if (!body)
846
+ return '';
847
+ return serializeNode(body);
848
+ }
849
+ /** Return a SHA-256 fingerprint for a function body (normalized). */
850
+ function fingerprintFunction(fn) {
851
+ const normalized = normalizeFunctionBody(fn);
852
+ return crypto.createHash('sha256').update(normalized).digest('hex');
853
+ }
854
+ /** Return all function-like nodes from a SourceFile that are worth comparing:
855
+ * - At least MIN_LINES lines in their body
856
+ * - Not test helpers (describe/it/test/beforeEach/afterEach)
857
+ */
858
+ const MIN_LINES = 8;
859
+ function collectFunctions(sf) {
860
+ const results = [];
861
+ const kinds = [
862
+ SyntaxKind.FunctionDeclaration,
863
+ SyntaxKind.FunctionExpression,
864
+ SyntaxKind.ArrowFunction,
865
+ SyntaxKind.MethodDeclaration,
866
+ ];
867
+ for (const kind of kinds) {
868
+ for (const node of sf.getDescendantsOfKind(kind)) {
869
+ const body = node.getBody();
870
+ if (!body)
871
+ continue;
872
+ const start = body.getStartLineNumber();
873
+ const end = body.getEndLineNumber();
874
+ if (end - start + 1 < MIN_LINES)
875
+ continue;
876
+ // Skip test-framework helpers
877
+ const name = node.getKind() === SyntaxKind.FunctionDeclaration
878
+ ? node.getName() ?? '<anonymous>'
879
+ : node.getKind() === SyntaxKind.MethodDeclaration
880
+ ? node.getName()
881
+ : '<anonymous>';
882
+ if (['describe', 'it', 'test', 'beforeEach', 'afterEach', 'beforeAll', 'afterAll'].includes(name))
883
+ continue;
884
+ const pos = node.getStart();
885
+ const lineInfo = sf.getLineAndColumnAtPos(pos);
886
+ results.push({ fn: node, name, line: lineInfo.line, col: lineInfo.column });
887
+ }
888
+ }
889
+ return results;
890
+ }
789
891
  // ---------------------------------------------------------------------------
790
892
  // Public API
791
893
  // ---------------------------------------------------------------------------
@@ -1162,6 +1264,46 @@ export function analyzeProject(targetPath, config) {
1162
1264
  }
1163
1265
  }
1164
1266
  }
1267
+ // ── Phase 8: semantic-duplication ────────────────────────────────────────
1268
+ // Build a fingerprint → [{filePath, fnName, line, col}] map across all files
1269
+ const fingerprintMap = new Map();
1270
+ for (const sf of sourceFiles) {
1271
+ const sfPath = sf.getFilePath();
1272
+ for (const { fn, name, line, col } of collectFunctions(sf)) {
1273
+ const fp = fingerprintFunction(fn);
1274
+ if (!fingerprintMap.has(fp))
1275
+ fingerprintMap.set(fp, []);
1276
+ fingerprintMap.get(fp).push({ filePath: sfPath, name, line, col });
1277
+ }
1278
+ }
1279
+ // For each fingerprint with 2+ functions: report each as a duplicate of the others
1280
+ for (const [, entries] of fingerprintMap) {
1281
+ if (entries.length < 2)
1282
+ continue;
1283
+ for (const entry of entries) {
1284
+ const report = reportByPath.get(entry.filePath);
1285
+ if (!report)
1286
+ continue;
1287
+ // Build the "duplicated in" list (all other locations)
1288
+ const others = entries
1289
+ .filter(e => e !== entry)
1290
+ .map(e => {
1291
+ const rel = path.relative(targetPath, e.filePath).replace(/\\/g, '/');
1292
+ return `${rel}:${e.line} (${e.name})`;
1293
+ })
1294
+ .join(', ');
1295
+ const weight = RULE_WEIGHTS['semantic-duplication']?.weight ?? 12;
1296
+ report.issues.push({
1297
+ rule: 'semantic-duplication',
1298
+ severity: 'warning',
1299
+ message: `Function '${entry.name}' is semantically identical to: ${others}`,
1300
+ line: entry.line,
1301
+ column: entry.col,
1302
+ snippet: `function ${entry.name} — duplicated in ${entries.length - 1} other location${entries.length > 2 ? 's' : ''}`,
1303
+ });
1304
+ report.score = Math.min(100, report.score + weight);
1305
+ }
1306
+ }
1165
1307
  return reports;
1166
1308
  }
1167
1309
  //# sourceMappingURL=analyzer.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@eduardbar/drift",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "description": "Detect silent technical debt left by AI-generated code",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
package/src/analyzer.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import * as fs from 'node:fs'
2
+ import * as crypto from 'node:crypto'
2
3
  import * as path from 'node:path'
3
4
  import {
4
5
  Project,
@@ -45,6 +46,8 @@ export const RULE_WEIGHTS: Record<string, { severity: DriftIssue['severity']; we
45
46
  'inconsistent-error-handling': { severity: 'warning', weight: 8 },
46
47
  'unnecessary-abstraction': { severity: 'warning', weight: 7 },
47
48
  'naming-inconsistency': { severity: 'warning', weight: 6 },
49
+ // Phase 8: semantic duplication
50
+ 'semantic-duplication': { severity: 'warning', weight: 12 },
48
51
  }
49
52
 
50
53
  type FunctionLike = FunctionDeclaration | ArrowFunction | FunctionExpression | MethodDeclaration
@@ -873,6 +876,123 @@ function calculateScore(issues: DriftIssue[]): number {
873
876
  return Math.min(100, raw)
874
877
  }
875
878
 
879
+ // ---------------------------------------------------------------------------
880
+ // Phase 8: Semantic duplication — AST fingerprinting helpers
881
+ // ---------------------------------------------------------------------------
882
+
883
+ type FunctionLikeNode = FunctionDeclaration | ArrowFunction | FunctionExpression | MethodDeclaration
884
+
885
+ /** Normalize a function body to a canonical string (Type-2 clone detection).
886
+ * Variable names, parameter names, and numeric/string literals are replaced
887
+ * with canonical tokens so that two functions with identical logic but
888
+ * different identifiers produce the same fingerprint.
889
+ */
890
+ function normalizeFunctionBody(fn: FunctionLikeNode): string {
891
+ // Build a substitution map: localName → canonical token
892
+ const subst = new Map<string, string>()
893
+
894
+ // Map parameters first
895
+ for (const [i, param] of fn.getParameters().entries()) {
896
+ const name = param.getName()
897
+ if (name && name !== '_') subst.set(name, `P${i}`)
898
+ }
899
+
900
+ // Map locally declared variables (VariableDeclaration)
901
+ let varIdx = 0
902
+ fn.forEachDescendant(node => {
903
+ if (node.getKind() === SyntaxKind.VariableDeclaration) {
904
+ const nameNode = (node as import('ts-morph').VariableDeclaration).getNameNode()
905
+ // Support destructuring — getNameNode() may be a BindingPattern
906
+ if (nameNode.getKind() === SyntaxKind.Identifier) {
907
+ const name = nameNode.getText()
908
+ if (!subst.has(name)) subst.set(name, `V${varIdx++}`)
909
+ }
910
+ }
911
+ })
912
+
913
+ function serializeNode(node: Node): string {
914
+ const kind = node.getKindName()
915
+
916
+ switch (node.getKind()) {
917
+ case SyntaxKind.Identifier: {
918
+ const text = node.getText()
919
+ return subst.get(text) ?? text // external refs (Math, console) kept as-is
920
+ }
921
+ case SyntaxKind.NumericLiteral:
922
+ return 'NL'
923
+ case SyntaxKind.StringLiteral:
924
+ case SyntaxKind.NoSubstitutionTemplateLiteral:
925
+ return 'SL'
926
+ case SyntaxKind.TrueKeyword:
927
+ return 'TRUE'
928
+ case SyntaxKind.FalseKeyword:
929
+ return 'FALSE'
930
+ case SyntaxKind.NullKeyword:
931
+ return 'NULL'
932
+ }
933
+
934
+ const children = node.getChildren()
935
+ if (children.length === 0) return kind
936
+
937
+ const childStr = children.map(serializeNode).join('|')
938
+ return `${kind}(${childStr})`
939
+ }
940
+
941
+ const body = fn.getBody()
942
+ if (!body) return ''
943
+ return serializeNode(body)
944
+ }
945
+
946
+ /** Return a SHA-256 fingerprint for a function body (normalized). */
947
+ function fingerprintFunction(fn: FunctionLikeNode): string {
948
+ const normalized = normalizeFunctionBody(fn)
949
+ return crypto.createHash('sha256').update(normalized).digest('hex')
950
+ }
951
+
952
+ /** Return all function-like nodes from a SourceFile that are worth comparing:
953
+ * - At least MIN_LINES lines in their body
954
+ * - Not test helpers (describe/it/test/beforeEach/afterEach)
955
+ */
956
+ const MIN_LINES = 8
957
+
958
+ function collectFunctions(sf: SourceFile): Array<{ fn: FunctionLikeNode; name: string; line: number; col: number }> {
959
+ const results: Array<{ fn: FunctionLikeNode; name: string; line: number; col: number }> = []
960
+
961
+ const kinds = [
962
+ SyntaxKind.FunctionDeclaration,
963
+ SyntaxKind.FunctionExpression,
964
+ SyntaxKind.ArrowFunction,
965
+ SyntaxKind.MethodDeclaration,
966
+ ] as const
967
+
968
+ for (const kind of kinds) {
969
+ for (const node of sf.getDescendantsOfKind(kind)) {
970
+ const body = (node as FunctionLikeNode).getBody()
971
+ if (!body) continue
972
+
973
+ const start = body.getStartLineNumber()
974
+ const end = body.getEndLineNumber()
975
+ if (end - start + 1 < MIN_LINES) continue
976
+
977
+ // Skip test-framework helpers
978
+ const name = node.getKind() === SyntaxKind.FunctionDeclaration
979
+ ? (node as FunctionDeclaration).getName() ?? '<anonymous>'
980
+ : node.getKind() === SyntaxKind.MethodDeclaration
981
+ ? (node as MethodDeclaration).getName()
982
+ : '<anonymous>'
983
+
984
+ if (['describe', 'it', 'test', 'beforeEach', 'afterEach', 'beforeAll', 'afterAll'].includes(name)) continue
985
+
986
+ const pos = node.getStart()
987
+ const lineInfo = sf.getLineAndColumnAtPos(pos)
988
+
989
+ results.push({ fn: node as FunctionLikeNode, name, line: lineInfo.line, col: lineInfo.column })
990
+ }
991
+ }
992
+
993
+ return results
994
+ }
995
+
876
996
  // ---------------------------------------------------------------------------
877
997
  // Public API
878
998
  // ---------------------------------------------------------------------------
@@ -1284,5 +1404,48 @@ export function analyzeProject(targetPath: string, config?: DriftConfig): FileRe
1284
1404
  }
1285
1405
  }
1286
1406
 
1407
+ // ── Phase 8: semantic-duplication ────────────────────────────────────────
1408
+ // Build a fingerprint → [{filePath, fnName, line, col}] map across all files
1409
+ const fingerprintMap = new Map<string, Array<{ filePath: string; name: string; line: number; col: number }>>()
1410
+
1411
+ for (const sf of sourceFiles) {
1412
+ const sfPath = sf.getFilePath()
1413
+ for (const { fn, name, line, col } of collectFunctions(sf)) {
1414
+ const fp = fingerprintFunction(fn)
1415
+ if (!fingerprintMap.has(fp)) fingerprintMap.set(fp, [])
1416
+ fingerprintMap.get(fp)!.push({ filePath: sfPath, name, line, col })
1417
+ }
1418
+ }
1419
+
1420
+ // For each fingerprint with 2+ functions: report each as a duplicate of the others
1421
+ for (const [, entries] of fingerprintMap) {
1422
+ if (entries.length < 2) continue
1423
+
1424
+ for (const entry of entries) {
1425
+ const report = reportByPath.get(entry.filePath)
1426
+ if (!report) continue
1427
+
1428
+ // Build the "duplicated in" list (all other locations)
1429
+ const others = entries
1430
+ .filter(e => e !== entry)
1431
+ .map(e => {
1432
+ const rel = path.relative(targetPath, e.filePath).replace(/\\/g, '/')
1433
+ return `${rel}:${e.line} (${e.name})`
1434
+ })
1435
+ .join(', ')
1436
+
1437
+ const weight = RULE_WEIGHTS['semantic-duplication']?.weight ?? 12
1438
+ report.issues.push({
1439
+ rule: 'semantic-duplication',
1440
+ severity: 'warning',
1441
+ message: `Function '${entry.name}' is semantically identical to: ${others}`,
1442
+ line: entry.line,
1443
+ column: entry.col,
1444
+ snippet: `function ${entry.name} — duplicated in ${entries.length - 1} other location${entries.length > 2 ? 's' : ''}`,
1445
+ })
1446
+ report.score = Math.min(100, report.score + weight)
1447
+ }
1448
+ }
1449
+
1287
1450
  return reports
1288
1451
  }