@eduardbar/drift 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/analyzer.js +142 -0
- package/package.json +1 -1
- package/src/analyzer.ts +163 -0
package/CHANGELOG.md
CHANGED
|
@@ -18,6 +18,18 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|
|
18
18
|
|
|
19
19
|
---
|
|
20
20
|
|
|
21
|
+
## [0.8.0] - 2026-02-24
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- `semantic-duplication` rule — Type-2 AST clone detection via SHA-256 fingerprinting
|
|
25
|
+
- Normalizes parameter names, local variable names, and literals before hashing — detects identical logic with different variable names
|
|
26
|
+
- Runs cross-file across the entire project; reports each duplicate pointing to all other locations
|
|
27
|
+
- Minimum threshold: functions with ≥ 8 body lines (reduces noise from trivial helpers)
|
|
28
|
+
- Skips test framework helpers (describe, it, test, beforeEach, afterEach)
|
|
29
|
+
- RULE_WEIGHTS entry: severity `warning`, weight `12`
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
21
33
|
## [0.7.0] - 2026-02-24
|
|
22
34
|
|
|
23
35
|
### Added
|
package/dist/analyzer.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as fs from 'node:fs';
|
|
2
|
+
import * as crypto from 'node:crypto';
|
|
2
3
|
import * as path from 'node:path';
|
|
3
4
|
import { Project, SyntaxKind, } from 'ts-morph';
|
|
4
5
|
// Rules and their drift score weight
|
|
@@ -34,6 +35,8 @@ export const RULE_WEIGHTS = {
|
|
|
34
35
|
'inconsistent-error-handling': { severity: 'warning', weight: 8 },
|
|
35
36
|
'unnecessary-abstraction': { severity: 'warning', weight: 7 },
|
|
36
37
|
'naming-inconsistency': { severity: 'warning', weight: 6 },
|
|
38
|
+
// Phase 8: semantic duplication
|
|
39
|
+
'semantic-duplication': { severity: 'warning', weight: 12 },
|
|
37
40
|
};
|
|
38
41
|
function hasIgnoreComment(file, line) {
|
|
39
42
|
const lines = file.getFullText().split('\n');
|
|
@@ -786,6 +789,105 @@ function calculateScore(issues) {
|
|
|
786
789
|
}
|
|
787
790
|
return Math.min(100, raw);
|
|
788
791
|
}
|
|
792
|
+
/** Normalize a function body to a canonical string (Type-2 clone detection).
|
|
793
|
+
* Variable names, parameter names, and numeric/string literals are replaced
|
|
794
|
+
* with canonical tokens so that two functions with identical logic but
|
|
795
|
+
* different identifiers produce the same fingerprint.
|
|
796
|
+
*/
|
|
797
|
+
function normalizeFunctionBody(fn) {
|
|
798
|
+
// Build a substitution map: localName → canonical token
|
|
799
|
+
const subst = new Map();
|
|
800
|
+
// Map parameters first
|
|
801
|
+
for (const [i, param] of fn.getParameters().entries()) {
|
|
802
|
+
const name = param.getName();
|
|
803
|
+
if (name && name !== '_')
|
|
804
|
+
subst.set(name, `P${i}`);
|
|
805
|
+
}
|
|
806
|
+
// Map locally declared variables (VariableDeclaration)
|
|
807
|
+
let varIdx = 0;
|
|
808
|
+
fn.forEachDescendant(node => {
|
|
809
|
+
if (node.getKind() === SyntaxKind.VariableDeclaration) {
|
|
810
|
+
const nameNode = node.getNameNode();
|
|
811
|
+
// Support destructuring — getNameNode() may be a BindingPattern
|
|
812
|
+
if (nameNode.getKind() === SyntaxKind.Identifier) {
|
|
813
|
+
const name = nameNode.getText();
|
|
814
|
+
if (!subst.has(name))
|
|
815
|
+
subst.set(name, `V${varIdx++}`);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
});
|
|
819
|
+
function serializeNode(node) {
|
|
820
|
+
const kind = node.getKindName();
|
|
821
|
+
switch (node.getKind()) {
|
|
822
|
+
case SyntaxKind.Identifier: {
|
|
823
|
+
const text = node.getText();
|
|
824
|
+
return subst.get(text) ?? text; // external refs (Math, console) kept as-is
|
|
825
|
+
}
|
|
826
|
+
case SyntaxKind.NumericLiteral:
|
|
827
|
+
return 'NL';
|
|
828
|
+
case SyntaxKind.StringLiteral:
|
|
829
|
+
case SyntaxKind.NoSubstitutionTemplateLiteral:
|
|
830
|
+
return 'SL';
|
|
831
|
+
case SyntaxKind.TrueKeyword:
|
|
832
|
+
return 'TRUE';
|
|
833
|
+
case SyntaxKind.FalseKeyword:
|
|
834
|
+
return 'FALSE';
|
|
835
|
+
case SyntaxKind.NullKeyword:
|
|
836
|
+
return 'NULL';
|
|
837
|
+
}
|
|
838
|
+
const children = node.getChildren();
|
|
839
|
+
if (children.length === 0)
|
|
840
|
+
return kind;
|
|
841
|
+
const childStr = children.map(serializeNode).join('|');
|
|
842
|
+
return `${kind}(${childStr})`;
|
|
843
|
+
}
|
|
844
|
+
const body = fn.getBody();
|
|
845
|
+
if (!body)
|
|
846
|
+
return '';
|
|
847
|
+
return serializeNode(body);
|
|
848
|
+
}
|
|
849
|
+
/** Return a SHA-256 fingerprint for a function body (normalized). */
|
|
850
|
+
function fingerprintFunction(fn) {
|
|
851
|
+
const normalized = normalizeFunctionBody(fn);
|
|
852
|
+
return crypto.createHash('sha256').update(normalized).digest('hex');
|
|
853
|
+
}
|
|
854
|
+
/** Return all function-like nodes from a SourceFile that are worth comparing:
|
|
855
|
+
* - At least MIN_LINES lines in their body
|
|
856
|
+
* - Not test helpers (describe/it/test/beforeEach/afterEach)
|
|
857
|
+
*/
|
|
858
|
+
const MIN_LINES = 8;
|
|
859
|
+
function collectFunctions(sf) {
|
|
860
|
+
const results = [];
|
|
861
|
+
const kinds = [
|
|
862
|
+
SyntaxKind.FunctionDeclaration,
|
|
863
|
+
SyntaxKind.FunctionExpression,
|
|
864
|
+
SyntaxKind.ArrowFunction,
|
|
865
|
+
SyntaxKind.MethodDeclaration,
|
|
866
|
+
];
|
|
867
|
+
for (const kind of kinds) {
|
|
868
|
+
for (const node of sf.getDescendantsOfKind(kind)) {
|
|
869
|
+
const body = node.getBody();
|
|
870
|
+
if (!body)
|
|
871
|
+
continue;
|
|
872
|
+
const start = body.getStartLineNumber();
|
|
873
|
+
const end = body.getEndLineNumber();
|
|
874
|
+
if (end - start + 1 < MIN_LINES)
|
|
875
|
+
continue;
|
|
876
|
+
// Skip test-framework helpers
|
|
877
|
+
const name = node.getKind() === SyntaxKind.FunctionDeclaration
|
|
878
|
+
? node.getName() ?? '<anonymous>'
|
|
879
|
+
: node.getKind() === SyntaxKind.MethodDeclaration
|
|
880
|
+
? node.getName()
|
|
881
|
+
: '<anonymous>';
|
|
882
|
+
if (['describe', 'it', 'test', 'beforeEach', 'afterEach', 'beforeAll', 'afterAll'].includes(name))
|
|
883
|
+
continue;
|
|
884
|
+
const pos = node.getStart();
|
|
885
|
+
const lineInfo = sf.getLineAndColumnAtPos(pos);
|
|
886
|
+
results.push({ fn: node, name, line: lineInfo.line, col: lineInfo.column });
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
return results;
|
|
890
|
+
}
|
|
789
891
|
// ---------------------------------------------------------------------------
|
|
790
892
|
// Public API
|
|
791
893
|
// ---------------------------------------------------------------------------
|
|
@@ -1162,6 +1264,46 @@ export function analyzeProject(targetPath, config) {
|
|
|
1162
1264
|
}
|
|
1163
1265
|
}
|
|
1164
1266
|
}
|
|
1267
|
+
// ── Phase 8: semantic-duplication ────────────────────────────────────────
|
|
1268
|
+
// Build a fingerprint → [{filePath, fnName, line, col}] map across all files
|
|
1269
|
+
const fingerprintMap = new Map();
|
|
1270
|
+
for (const sf of sourceFiles) {
|
|
1271
|
+
const sfPath = sf.getFilePath();
|
|
1272
|
+
for (const { fn, name, line, col } of collectFunctions(sf)) {
|
|
1273
|
+
const fp = fingerprintFunction(fn);
|
|
1274
|
+
if (!fingerprintMap.has(fp))
|
|
1275
|
+
fingerprintMap.set(fp, []);
|
|
1276
|
+
fingerprintMap.get(fp).push({ filePath: sfPath, name, line, col });
|
|
1277
|
+
}
|
|
1278
|
+
}
|
|
1279
|
+
// For each fingerprint with 2+ functions: report each as a duplicate of the others
|
|
1280
|
+
for (const [, entries] of fingerprintMap) {
|
|
1281
|
+
if (entries.length < 2)
|
|
1282
|
+
continue;
|
|
1283
|
+
for (const entry of entries) {
|
|
1284
|
+
const report = reportByPath.get(entry.filePath);
|
|
1285
|
+
if (!report)
|
|
1286
|
+
continue;
|
|
1287
|
+
// Build the "duplicated in" list (all other locations)
|
|
1288
|
+
const others = entries
|
|
1289
|
+
.filter(e => e !== entry)
|
|
1290
|
+
.map(e => {
|
|
1291
|
+
const rel = path.relative(targetPath, e.filePath).replace(/\\/g, '/');
|
|
1292
|
+
return `${rel}:${e.line} (${e.name})`;
|
|
1293
|
+
})
|
|
1294
|
+
.join(', ');
|
|
1295
|
+
const weight = RULE_WEIGHTS['semantic-duplication']?.weight ?? 12;
|
|
1296
|
+
report.issues.push({
|
|
1297
|
+
rule: 'semantic-duplication',
|
|
1298
|
+
severity: 'warning',
|
|
1299
|
+
message: `Function '${entry.name}' is semantically identical to: ${others}`,
|
|
1300
|
+
line: entry.line,
|
|
1301
|
+
column: entry.col,
|
|
1302
|
+
snippet: `function ${entry.name} — duplicated in ${entries.length - 1} other location${entries.length > 2 ? 's' : ''}`,
|
|
1303
|
+
});
|
|
1304
|
+
report.score = Math.min(100, report.score + weight);
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1165
1307
|
return reports;
|
|
1166
1308
|
}
|
|
1167
1309
|
//# sourceMappingURL=analyzer.js.map
|
package/package.json
CHANGED
package/src/analyzer.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as fs from 'node:fs'
|
|
2
|
+
import * as crypto from 'node:crypto'
|
|
2
3
|
import * as path from 'node:path'
|
|
3
4
|
import {
|
|
4
5
|
Project,
|
|
@@ -45,6 +46,8 @@ export const RULE_WEIGHTS: Record<string, { severity: DriftIssue['severity']; we
|
|
|
45
46
|
'inconsistent-error-handling': { severity: 'warning', weight: 8 },
|
|
46
47
|
'unnecessary-abstraction': { severity: 'warning', weight: 7 },
|
|
47
48
|
'naming-inconsistency': { severity: 'warning', weight: 6 },
|
|
49
|
+
// Phase 8: semantic duplication
|
|
50
|
+
'semantic-duplication': { severity: 'warning', weight: 12 },
|
|
48
51
|
}
|
|
49
52
|
|
|
50
53
|
type FunctionLike = FunctionDeclaration | ArrowFunction | FunctionExpression | MethodDeclaration
|
|
@@ -873,6 +876,123 @@ function calculateScore(issues: DriftIssue[]): number {
|
|
|
873
876
|
return Math.min(100, raw)
|
|
874
877
|
}
|
|
875
878
|
|
|
879
|
+
// ---------------------------------------------------------------------------
|
|
880
|
+
// Phase 8: Semantic duplication — AST fingerprinting helpers
|
|
881
|
+
// ---------------------------------------------------------------------------
|
|
882
|
+
|
|
883
|
+
type FunctionLikeNode = FunctionDeclaration | ArrowFunction | FunctionExpression | MethodDeclaration
|
|
884
|
+
|
|
885
|
+
/** Normalize a function body to a canonical string (Type-2 clone detection).
|
|
886
|
+
* Variable names, parameter names, and numeric/string literals are replaced
|
|
887
|
+
* with canonical tokens so that two functions with identical logic but
|
|
888
|
+
* different identifiers produce the same fingerprint.
|
|
889
|
+
*/
|
|
890
|
+
function normalizeFunctionBody(fn: FunctionLikeNode): string {
|
|
891
|
+
// Build a substitution map: localName → canonical token
|
|
892
|
+
const subst = new Map<string, string>()
|
|
893
|
+
|
|
894
|
+
// Map parameters first
|
|
895
|
+
for (const [i, param] of fn.getParameters().entries()) {
|
|
896
|
+
const name = param.getName()
|
|
897
|
+
if (name && name !== '_') subst.set(name, `P${i}`)
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
// Map locally declared variables (VariableDeclaration)
|
|
901
|
+
let varIdx = 0
|
|
902
|
+
fn.forEachDescendant(node => {
|
|
903
|
+
if (node.getKind() === SyntaxKind.VariableDeclaration) {
|
|
904
|
+
const nameNode = (node as import('ts-morph').VariableDeclaration).getNameNode()
|
|
905
|
+
// Support destructuring — getNameNode() may be a BindingPattern
|
|
906
|
+
if (nameNode.getKind() === SyntaxKind.Identifier) {
|
|
907
|
+
const name = nameNode.getText()
|
|
908
|
+
if (!subst.has(name)) subst.set(name, `V${varIdx++}`)
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
})
|
|
912
|
+
|
|
913
|
+
function serializeNode(node: Node): string {
|
|
914
|
+
const kind = node.getKindName()
|
|
915
|
+
|
|
916
|
+
switch (node.getKind()) {
|
|
917
|
+
case SyntaxKind.Identifier: {
|
|
918
|
+
const text = node.getText()
|
|
919
|
+
return subst.get(text) ?? text // external refs (Math, console) kept as-is
|
|
920
|
+
}
|
|
921
|
+
case SyntaxKind.NumericLiteral:
|
|
922
|
+
return 'NL'
|
|
923
|
+
case SyntaxKind.StringLiteral:
|
|
924
|
+
case SyntaxKind.NoSubstitutionTemplateLiteral:
|
|
925
|
+
return 'SL'
|
|
926
|
+
case SyntaxKind.TrueKeyword:
|
|
927
|
+
return 'TRUE'
|
|
928
|
+
case SyntaxKind.FalseKeyword:
|
|
929
|
+
return 'FALSE'
|
|
930
|
+
case SyntaxKind.NullKeyword:
|
|
931
|
+
return 'NULL'
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
const children = node.getChildren()
|
|
935
|
+
if (children.length === 0) return kind
|
|
936
|
+
|
|
937
|
+
const childStr = children.map(serializeNode).join('|')
|
|
938
|
+
return `${kind}(${childStr})`
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
const body = fn.getBody()
|
|
942
|
+
if (!body) return ''
|
|
943
|
+
return serializeNode(body)
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
/** Return a SHA-256 fingerprint for a function body (normalized). */
|
|
947
|
+
function fingerprintFunction(fn: FunctionLikeNode): string {
|
|
948
|
+
const normalized = normalizeFunctionBody(fn)
|
|
949
|
+
return crypto.createHash('sha256').update(normalized).digest('hex')
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
/** Return all function-like nodes from a SourceFile that are worth comparing:
|
|
953
|
+
* - At least MIN_LINES lines in their body
|
|
954
|
+
* - Not test helpers (describe/it/test/beforeEach/afterEach)
|
|
955
|
+
*/
|
|
956
|
+
const MIN_LINES = 8
|
|
957
|
+
|
|
958
|
+
function collectFunctions(sf: SourceFile): Array<{ fn: FunctionLikeNode; name: string; line: number; col: number }> {
|
|
959
|
+
const results: Array<{ fn: FunctionLikeNode; name: string; line: number; col: number }> = []
|
|
960
|
+
|
|
961
|
+
const kinds = [
|
|
962
|
+
SyntaxKind.FunctionDeclaration,
|
|
963
|
+
SyntaxKind.FunctionExpression,
|
|
964
|
+
SyntaxKind.ArrowFunction,
|
|
965
|
+
SyntaxKind.MethodDeclaration,
|
|
966
|
+
] as const
|
|
967
|
+
|
|
968
|
+
for (const kind of kinds) {
|
|
969
|
+
for (const node of sf.getDescendantsOfKind(kind)) {
|
|
970
|
+
const body = (node as FunctionLikeNode).getBody()
|
|
971
|
+
if (!body) continue
|
|
972
|
+
|
|
973
|
+
const start = body.getStartLineNumber()
|
|
974
|
+
const end = body.getEndLineNumber()
|
|
975
|
+
if (end - start + 1 < MIN_LINES) continue
|
|
976
|
+
|
|
977
|
+
// Skip test-framework helpers
|
|
978
|
+
const name = node.getKind() === SyntaxKind.FunctionDeclaration
|
|
979
|
+
? (node as FunctionDeclaration).getName() ?? '<anonymous>'
|
|
980
|
+
: node.getKind() === SyntaxKind.MethodDeclaration
|
|
981
|
+
? (node as MethodDeclaration).getName()
|
|
982
|
+
: '<anonymous>'
|
|
983
|
+
|
|
984
|
+
if (['describe', 'it', 'test', 'beforeEach', 'afterEach', 'beforeAll', 'afterAll'].includes(name)) continue
|
|
985
|
+
|
|
986
|
+
const pos = node.getStart()
|
|
987
|
+
const lineInfo = sf.getLineAndColumnAtPos(pos)
|
|
988
|
+
|
|
989
|
+
results.push({ fn: node as FunctionLikeNode, name, line: lineInfo.line, col: lineInfo.column })
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
return results
|
|
994
|
+
}
|
|
995
|
+
|
|
876
996
|
// ---------------------------------------------------------------------------
|
|
877
997
|
// Public API
|
|
878
998
|
// ---------------------------------------------------------------------------
|
|
@@ -1284,5 +1404,48 @@ export function analyzeProject(targetPath: string, config?: DriftConfig): FileRe
|
|
|
1284
1404
|
}
|
|
1285
1405
|
}
|
|
1286
1406
|
|
|
1407
|
+
// ── Phase 8: semantic-duplication ────────────────────────────────────────
|
|
1408
|
+
// Build a fingerprint → [{filePath, fnName, line, col}] map across all files
|
|
1409
|
+
const fingerprintMap = new Map<string, Array<{ filePath: string; name: string; line: number; col: number }>>()
|
|
1410
|
+
|
|
1411
|
+
for (const sf of sourceFiles) {
|
|
1412
|
+
const sfPath = sf.getFilePath()
|
|
1413
|
+
for (const { fn, name, line, col } of collectFunctions(sf)) {
|
|
1414
|
+
const fp = fingerprintFunction(fn)
|
|
1415
|
+
if (!fingerprintMap.has(fp)) fingerprintMap.set(fp, [])
|
|
1416
|
+
fingerprintMap.get(fp)!.push({ filePath: sfPath, name, line, col })
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
// For each fingerprint with 2+ functions: report each as a duplicate of the others
|
|
1421
|
+
for (const [, entries] of fingerprintMap) {
|
|
1422
|
+
if (entries.length < 2) continue
|
|
1423
|
+
|
|
1424
|
+
for (const entry of entries) {
|
|
1425
|
+
const report = reportByPath.get(entry.filePath)
|
|
1426
|
+
if (!report) continue
|
|
1427
|
+
|
|
1428
|
+
// Build the "duplicated in" list (all other locations)
|
|
1429
|
+
const others = entries
|
|
1430
|
+
.filter(e => e !== entry)
|
|
1431
|
+
.map(e => {
|
|
1432
|
+
const rel = path.relative(targetPath, e.filePath).replace(/\\/g, '/')
|
|
1433
|
+
return `${rel}:${e.line} (${e.name})`
|
|
1434
|
+
})
|
|
1435
|
+
.join(', ')
|
|
1436
|
+
|
|
1437
|
+
const weight = RULE_WEIGHTS['semantic-duplication']?.weight ?? 12
|
|
1438
|
+
report.issues.push({
|
|
1439
|
+
rule: 'semantic-duplication',
|
|
1440
|
+
severity: 'warning',
|
|
1441
|
+
message: `Function '${entry.name}' is semantically identical to: ${others}`,
|
|
1442
|
+
line: entry.line,
|
|
1443
|
+
column: entry.col,
|
|
1444
|
+
snippet: `function ${entry.name} — duplicated in ${entries.length - 1} other location${entries.length > 2 ? 's' : ''}`,
|
|
1445
|
+
})
|
|
1446
|
+
report.score = Math.min(100, report.score + weight)
|
|
1447
|
+
}
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1287
1450
|
return reports
|
|
1288
1451
|
}
|