@safetnsr/vet 1.19.1 → 1.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/checks/clones.d.ts +2 -0
- package/dist/checks/clones.js +172 -0
- package/dist/cli.js +4 -2
- package/package.json +1 -1
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { join } from 'node:path';
|
|
2
|
+
import { createHash } from 'node:crypto';
|
|
3
|
+
import { walkFiles, readFile, c } from '../util.js';
|
|
4
|
+
const SOURCE_EXTS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mts', '.mjs']);
|
|
5
|
+
function isSourceFile(f) {
|
|
6
|
+
const dot = f.lastIndexOf('.');
|
|
7
|
+
return dot !== -1 && SOURCE_EXTS.has(f.substring(dot));
|
|
8
|
+
}
|
|
9
|
+
function isTestFile(f) {
|
|
10
|
+
return /\.(test|spec)\.[jt]sx?$/.test(f) || f.includes('__tests__') || /(?:^|[/\\])tests?[/\\]/.test(f);
|
|
11
|
+
}
|
|
12
|
+
function isExampleFile(f) {
|
|
13
|
+
return /(?:^|[/\\])(?:examples?|templates?|fixtures?|demos?)[/\\]/.test(f);
|
|
14
|
+
}
|
|
15
|
+
// ── Token normalization ─────────────────────────────────────────────────────
|
|
16
|
+
// Strip comments, normalize whitespace, replace identifiers with placeholders
|
|
17
|
+
// This makes structurally identical code match even with different variable names
|
|
18
|
+
function normalizeTokens(code) {
|
|
19
|
+
// Remove single-line comments
|
|
20
|
+
let normalized = code.replace(/\/\/.*$/gm, '');
|
|
21
|
+
// Remove multi-line comments
|
|
22
|
+
normalized = normalized.replace(/\/\*[\s\S]*?\*\//g, '');
|
|
23
|
+
// Remove string literals (replace with placeholder)
|
|
24
|
+
normalized = normalized.replace(/(["'`])(?:(?!\1|\\).|\\.)*\1/g, '"S"');
|
|
25
|
+
// Normalize whitespace
|
|
26
|
+
normalized = normalized.replace(/\s+/g, ' ').trim();
|
|
27
|
+
// Normalize numbers (replace with placeholder)
|
|
28
|
+
normalized = normalized.replace(/\b\d+\.?\d*\b/g, '0');
|
|
29
|
+
return normalized;
|
|
30
|
+
}
|
|
31
|
+
const MIN_CHUNK_LINES = 6; // minimum lines for a clone to matter
|
|
32
|
+
function extractChunks(file, content, windowSize) {
|
|
33
|
+
const lines = content.split('\n');
|
|
34
|
+
const chunks = [];
|
|
35
|
+
for (let i = 0; i <= lines.length - windowSize; i++) {
|
|
36
|
+
const rawSlice = lines.slice(i, i + windowSize);
|
|
37
|
+
// Skip chunks that are mostly empty or imports
|
|
38
|
+
const meaningful = rawSlice.filter(l => {
|
|
39
|
+
const t = l.trim();
|
|
40
|
+
return t && !t.startsWith('import ') && !t.startsWith('export ') && t !== '{' && t !== '}' && t !== ');';
|
|
41
|
+
});
|
|
42
|
+
if (meaningful.length < windowSize * 0.5)
|
|
43
|
+
continue;
|
|
44
|
+
const normalized = normalizeTokens(rawSlice.join('\n'));
|
|
45
|
+
if (normalized.length < 40)
|
|
46
|
+
continue; // too short after normalization
|
|
47
|
+
const hash = createHash('md5').update(normalized).digest('hex');
|
|
48
|
+
chunks.push({
|
|
49
|
+
file,
|
|
50
|
+
startLine: i + 1,
|
|
51
|
+
endLine: i + windowSize,
|
|
52
|
+
hash,
|
|
53
|
+
raw: rawSlice.join('\n'),
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return chunks;
|
|
57
|
+
}
|
|
58
|
+
export async function checkClones(cwd) {
|
|
59
|
+
const allFiles = walkFiles(cwd);
|
|
60
|
+
const sourceFiles = allFiles.filter(f => isSourceFile(f) && !isTestFile(f) && !isExampleFile(f));
|
|
61
|
+
if (sourceFiles.length < 2) {
|
|
62
|
+
return { name: 'clones', score: 100, maxScore: 100, summary: 'too few files', issues: [] };
|
|
63
|
+
}
|
|
64
|
+
const t0 = Date.now();
|
|
65
|
+
const issues = [];
|
|
66
|
+
// Single window size — use the largest to reduce noise
|
|
67
|
+
const WINDOW_SIZE = 10;
|
|
68
|
+
const hashMap = new Map();
|
|
69
|
+
for (const file of sourceFiles) {
|
|
70
|
+
const content = readFile(join(cwd, file));
|
|
71
|
+
if (!content)
|
|
72
|
+
continue;
|
|
73
|
+
const chunks = extractChunks(file, content, WINDOW_SIZE);
|
|
74
|
+
for (const chunk of chunks) {
|
|
75
|
+
if (!hashMap.has(chunk.hash))
|
|
76
|
+
hashMap.set(chunk.hash, []);
|
|
77
|
+
hashMap.get(chunk.hash).push(chunk);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Find cross-file duplicates, pick ONE representative per file per clone
|
|
81
|
+
const allCloneGroups = [];
|
|
82
|
+
for (const [hash, chunks] of hashMap) {
|
|
83
|
+
if (chunks.length < 2)
|
|
84
|
+
continue;
|
|
85
|
+
// Group by file, pick earliest occurrence per file
|
|
86
|
+
const byFile = new Map();
|
|
87
|
+
for (const chunk of chunks) {
|
|
88
|
+
const existing = byFile.get(chunk.file);
|
|
89
|
+
if (!existing || chunk.startLine < existing.startLine) {
|
|
90
|
+
byFile.set(chunk.file, chunk);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// Cross-file only
|
|
94
|
+
if (byFile.size < 2)
|
|
95
|
+
continue;
|
|
96
|
+
const reps = [...byFile.values()];
|
|
97
|
+
allCloneGroups.push({
|
|
98
|
+
hash,
|
|
99
|
+
locations: reps.map(r => ({ file: r.file, startLine: r.startLine, endLine: r.endLine })),
|
|
100
|
+
lineCount: WINDOW_SIZE,
|
|
101
|
+
sample: reps[0].raw.slice(0, 200),
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
// Deduplicate overlapping clones: group by file-set, merge overlapping line ranges
|
|
105
|
+
// Sort by number of files (more widespread clones first), then by earliest line
|
|
106
|
+
allCloneGroups.sort((a, b) => b.locations.length - a.locations.length || a.locations[0].startLine - b.locations[0].startLine);
|
|
107
|
+
const coveredRanges = new Map();
|
|
108
|
+
const filteredGroups = [];
|
|
109
|
+
for (const group of allCloneGroups) {
|
|
110
|
+
// Check if the first location is already substantially covered
|
|
111
|
+
const firstLoc = group.locations[0];
|
|
112
|
+
const covered = coveredRanges.get(firstLoc.file);
|
|
113
|
+
if (covered) {
|
|
114
|
+
let overlapCount = 0;
|
|
115
|
+
for (let line = firstLoc.startLine; line <= firstLoc.endLine; line++) {
|
|
116
|
+
if (covered.has(line))
|
|
117
|
+
overlapCount++;
|
|
118
|
+
}
|
|
119
|
+
// Skip if >50% of lines already reported
|
|
120
|
+
if (overlapCount > group.lineCount * 0.5)
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
filteredGroups.push(group);
|
|
124
|
+
// Mark all locations as covered
|
|
125
|
+
for (const loc of group.locations) {
|
|
126
|
+
if (!coveredRanges.has(loc.file))
|
|
127
|
+
coveredRanges.set(loc.file, new Set());
|
|
128
|
+
const set = coveredRanges.get(loc.file);
|
|
129
|
+
for (let line = loc.startLine; line <= loc.endLine; line++) {
|
|
130
|
+
set.add(line);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
// Report top clones
|
|
135
|
+
const topClones = filteredGroups.slice(0, 10);
|
|
136
|
+
for (const clone of topClones) {
|
|
137
|
+
const locs = clone.locations.slice(0, 3);
|
|
138
|
+
const locStr = locs.map(l => `${l.file}:${l.startLine}`).join(', ');
|
|
139
|
+
issues.push({
|
|
140
|
+
severity: clone.lineCount >= 15 ? 'warning' : 'info',
|
|
141
|
+
message: `duplicated ${clone.lineCount}-line block across ${clone.locations.length} files: ${locStr}`,
|
|
142
|
+
file: clone.locations[0].file,
|
|
143
|
+
line: clone.locations[0].startLine,
|
|
144
|
+
fixable: true,
|
|
145
|
+
fixHint: 'extract into a shared function or module',
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
const elapsed = Date.now() - t0;
|
|
149
|
+
// ── Scoring ───────────────────────────────────────────────────────────────
|
|
150
|
+
// Total duplicated lines as % of codebase
|
|
151
|
+
const totalSourceLines = sourceFiles.reduce((sum, f) => {
|
|
152
|
+
const content = readFile(join(cwd, f));
|
|
153
|
+
return sum + (content ? content.split('\n').length : 0);
|
|
154
|
+
}, 0);
|
|
155
|
+
// Count unique duplicated lines from covered ranges
|
|
156
|
+
let duplicatedLines = 0;
|
|
157
|
+
for (const lines of coveredRanges.values()) {
|
|
158
|
+
duplicatedLines += lines.size;
|
|
159
|
+
}
|
|
160
|
+
const duplicationRate = totalSourceLines > 0 ? duplicatedLines / totalSourceLines : 0;
|
|
161
|
+
const score = Math.max(25, Math.round(100 - duplicationRate * 400));
|
|
162
|
+
const parts = [];
|
|
163
|
+
parts.push(`${sourceFiles.length} files scanned in ${elapsed}ms`);
|
|
164
|
+
if (filteredGroups.length > 0) {
|
|
165
|
+
parts.push(c.yellow + `${filteredGroups.length} clone groups` + c.reset);
|
|
166
|
+
parts.push(`${duplicatedLines} duplicated lines (${(duplicationRate * 100).toFixed(1)}%)`);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
parts.push('no cross-file clones detected');
|
|
170
|
+
}
|
|
171
|
+
return { name: 'clones', score, maxScore: 100, summary: parts.join(', '), issues };
|
|
172
|
+
}
|
package/dist/cli.js
CHANGED
|
@@ -18,6 +18,7 @@ import { checkAIReady } from './checks/aiready.js';
|
|
|
18
18
|
import { checkDeep } from './checks/deep.js';
|
|
19
19
|
import { checkSemantic } from './checks/semantic.js';
|
|
20
20
|
import { checkHotspots } from './checks/hotspots.js';
|
|
21
|
+
import { checkClones } from './checks/clones.js';
|
|
21
22
|
import { checkReceipt, runReceiptCommand } from './checks/receipt.js';
|
|
22
23
|
import { checkMemory } from './checks/memory.js';
|
|
23
24
|
import { checkVerify } from './checks/verify.js';
|
|
@@ -326,7 +327,7 @@ async function runChecks() {
|
|
|
326
327
|
}
|
|
327
328
|
}
|
|
328
329
|
// Run ALL independent checks in parallel
|
|
329
|
-
const [scanResult, secretsResult, configResult, modelsResult, owaspResult, permissionsResult, integrityResult, readyResult, debtResult, depsResult, receiptResult, compactResult, subsidyResult, memoryResult, verifyResult, testsResult, loopResult, completenessResult, bloatResult, guardResult, explainResult, architectureResult, aireadyResult, deepResult, semanticResult, hotspotsResult,] = await Promise.all([
|
|
330
|
+
const [scanResult, secretsResult, configResult, modelsResult, owaspResult, permissionsResult, integrityResult, readyResult, debtResult, depsResult, receiptResult, compactResult, subsidyResult, memoryResult, verifyResult, testsResult, loopResult, completenessResult, bloatResult, guardResult, explainResult, architectureResult, aireadyResult, deepResult, semanticResult, hotspotsResult, clonesResult,] = await Promise.all([
|
|
330
331
|
withTimeout('scan', () => checkScan(cwd)),
|
|
331
332
|
withTimeout('secrets', () => checkSecrets(cwd)),
|
|
332
333
|
withTimeout('config', () => checkConfig(cwd, ignore)),
|
|
@@ -353,6 +354,7 @@ async function runChecks() {
|
|
|
353
354
|
withTimeout('deep', () => checkDeep(cwd), 60_000),
|
|
354
355
|
withTimeout('semantic', () => checkSemantic(cwd), 60_000),
|
|
355
356
|
withTimeout('hotspots', () => checkHotspots(cwd), 30_000),
|
|
357
|
+
withTimeout('clones', () => checkClones(cwd), 60_000),
|
|
356
358
|
]);
|
|
357
359
|
// Git-dependent checks (diff + history) — parallel with each other
|
|
358
360
|
const [diffResult, historyResult] = await Promise.all([
|
|
@@ -364,7 +366,7 @@ async function runChecks() {
|
|
|
364
366
|
return score(cwd, {
|
|
365
367
|
security: [scanResult, secretsResult, configResult, modelsResult, owaspResult, permissionsResult, subsidyResult, guardResult],
|
|
366
368
|
integrity: [diffResult, integrityResult, receiptResult, compactResult, memoryResult, verifyResult, testsResult, loopResult, completenessResult, explainResult],
|
|
367
|
-
debt: [readyResult, historyResult, debtResult, bloatResult],
|
|
369
|
+
debt: [readyResult, historyResult, debtResult, bloatResult, clonesResult],
|
|
368
370
|
deps: [depsResult],
|
|
369
371
|
architecture: [architectureResult],
|
|
370
372
|
aiready: [aireadyResult, deepResult, semanticResult],
|