@safetnsr/vet 1.19.1 → 1.19.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ import type { CheckResult } from '../types.js';
2
+ export declare function checkClones(cwd: string): Promise<CheckResult>;
@@ -0,0 +1,172 @@
1
+ import { join } from 'node:path';
2
+ import { createHash } from 'node:crypto';
3
+ import { walkFiles, readFile, c } from '../util.js';
4
+ const SOURCE_EXTS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mts', '.mjs']);
5
+ function isSourceFile(f) {
6
+ const dot = f.lastIndexOf('.');
7
+ return dot !== -1 && SOURCE_EXTS.has(f.substring(dot));
8
+ }
9
+ function isTestFile(f) {
10
+ return /\.(test|spec)\.[jt]sx?$/.test(f) || f.includes('__tests__') || /(?:^|[/\\])tests?[/\\]/.test(f);
11
+ }
12
+ function isExampleFile(f) {
13
+ return /(?:^|[/\\])(?:examples?|templates?|fixtures?|demos?)[/\\]/.test(f);
14
+ }
15
+ // ── Token normalization ─────────────────────────────────────────────────────
16
+ // Strip comments, normalize whitespace, replace identifiers with placeholders
17
+ // This makes structurally identical code match even with different variable names
18
+ function normalizeTokens(code) {
19
+ // Remove single-line comments
20
+ let normalized = code.replace(/\/\/.*$/gm, '');
21
+ // Remove multi-line comments
22
+ normalized = normalized.replace(/\/\*[\s\S]*?\*\//g, '');
23
+ // Remove string literals (replace with placeholder)
24
+ normalized = normalized.replace(/(["'`])(?:(?!\1|\\).|\\.)*\1/g, '"S"');
25
+ // Normalize whitespace
26
+ normalized = normalized.replace(/\s+/g, ' ').trim();
27
+ // Normalize numbers (replace with placeholder)
28
+ normalized = normalized.replace(/\b\d+\.?\d*\b/g, '0');
29
+ return normalized;
30
+ }
31
+ const MIN_CHUNK_LINES = 6; // minimum lines for a clone to matter
32
+ function extractChunks(file, content, windowSize) {
33
+ const lines = content.split('\n');
34
+ const chunks = [];
35
+ for (let i = 0; i <= lines.length - windowSize; i++) {
36
+ const rawSlice = lines.slice(i, i + windowSize);
37
+ // Skip chunks that are mostly empty or imports
38
+ const meaningful = rawSlice.filter(l => {
39
+ const t = l.trim();
40
+ return t && !t.startsWith('import ') && !t.startsWith('export ') && t !== '{' && t !== '}' && t !== ');';
41
+ });
42
+ if (meaningful.length < windowSize * 0.5)
43
+ continue;
44
+ const normalized = normalizeTokens(rawSlice.join('\n'));
45
+ if (normalized.length < 40)
46
+ continue; // too short after normalization
47
+ const hash = createHash('md5').update(normalized).digest('hex');
48
+ chunks.push({
49
+ file,
50
+ startLine: i + 1,
51
+ endLine: i + windowSize,
52
+ hash,
53
+ raw: rawSlice.join('\n'),
54
+ });
55
+ }
56
+ return chunks;
57
+ }
58
+ export async function checkClones(cwd) {
59
+ const allFiles = walkFiles(cwd);
60
+ const sourceFiles = allFiles.filter(f => isSourceFile(f) && !isTestFile(f) && !isExampleFile(f));
61
+ if (sourceFiles.length < 2) {
62
+ return { name: 'clones', score: 100, maxScore: 100, summary: 'too few files', issues: [] };
63
+ }
64
+ const t0 = Date.now();
65
+ const issues = [];
66
+ // Single window size — use the largest to reduce noise
67
+ const WINDOW_SIZE = 10;
68
+ const hashMap = new Map();
69
+ for (const file of sourceFiles) {
70
+ const content = readFile(join(cwd, file));
71
+ if (!content)
72
+ continue;
73
+ const chunks = extractChunks(file, content, WINDOW_SIZE);
74
+ for (const chunk of chunks) {
75
+ if (!hashMap.has(chunk.hash))
76
+ hashMap.set(chunk.hash, []);
77
+ hashMap.get(chunk.hash).push(chunk);
78
+ }
79
+ }
80
+ // Find cross-file duplicates, pick ONE representative per file per clone
81
+ const allCloneGroups = [];
82
+ for (const [hash, chunks] of hashMap) {
83
+ if (chunks.length < 2)
84
+ continue;
85
+ // Group by file, pick earliest occurrence per file
86
+ const byFile = new Map();
87
+ for (const chunk of chunks) {
88
+ const existing = byFile.get(chunk.file);
89
+ if (!existing || chunk.startLine < existing.startLine) {
90
+ byFile.set(chunk.file, chunk);
91
+ }
92
+ }
93
+ // Cross-file only
94
+ if (byFile.size < 2)
95
+ continue;
96
+ const reps = [...byFile.values()];
97
+ allCloneGroups.push({
98
+ hash,
99
+ locations: reps.map(r => ({ file: r.file, startLine: r.startLine, endLine: r.endLine })),
100
+ lineCount: WINDOW_SIZE,
101
+ sample: reps[0].raw.slice(0, 200),
102
+ });
103
+ }
104
+ // Deduplicate overlapping clones: group by file-set, merge overlapping line ranges
105
+ // Sort by number of files (more widespread clones first), then by earliest line
106
+ allCloneGroups.sort((a, b) => b.locations.length - a.locations.length || a.locations[0].startLine - b.locations[0].startLine);
107
+ const coveredRanges = new Map();
108
+ const filteredGroups = [];
109
+ for (const group of allCloneGroups) {
110
+ // Check if the first location is already substantially covered
111
+ const firstLoc = group.locations[0];
112
+ const covered = coveredRanges.get(firstLoc.file);
113
+ if (covered) {
114
+ let overlapCount = 0;
115
+ for (let line = firstLoc.startLine; line <= firstLoc.endLine; line++) {
116
+ if (covered.has(line))
117
+ overlapCount++;
118
+ }
119
+ // Skip if >50% of lines already reported
120
+ if (overlapCount > group.lineCount * 0.5)
121
+ continue;
122
+ }
123
+ filteredGroups.push(group);
124
+ // Mark all locations as covered
125
+ for (const loc of group.locations) {
126
+ if (!coveredRanges.has(loc.file))
127
+ coveredRanges.set(loc.file, new Set());
128
+ const set = coveredRanges.get(loc.file);
129
+ for (let line = loc.startLine; line <= loc.endLine; line++) {
130
+ set.add(line);
131
+ }
132
+ }
133
+ }
134
+ // Report top clones
135
+ const topClones = filteredGroups.slice(0, 10);
136
+ for (const clone of topClones) {
137
+ const locs = clone.locations.slice(0, 3);
138
+ const locStr = locs.map(l => `${l.file}:${l.startLine}`).join(', ');
139
+ issues.push({
140
+ severity: clone.lineCount >= 15 ? 'warning' : 'info',
141
+ message: `duplicated ${clone.lineCount}-line block across ${clone.locations.length} files: ${locStr}`,
142
+ file: clone.locations[0].file,
143
+ line: clone.locations[0].startLine,
144
+ fixable: true,
145
+ fixHint: 'extract into a shared function or module',
146
+ });
147
+ }
148
+ const elapsed = Date.now() - t0;
149
+ // ── Scoring ───────────────────────────────────────────────────────────────
150
+ // Total duplicated lines as % of codebase
151
+ const totalSourceLines = sourceFiles.reduce((sum, f) => {
152
+ const content = readFile(join(cwd, f));
153
+ return sum + (content ? content.split('\n').length : 0);
154
+ }, 0);
155
+ // Count unique duplicated lines from covered ranges
156
+ let duplicatedLines = 0;
157
+ for (const lines of coveredRanges.values()) {
158
+ duplicatedLines += lines.size;
159
+ }
160
+ const duplicationRate = totalSourceLines > 0 ? duplicatedLines / totalSourceLines : 0;
161
+ const score = Math.max(25, Math.round(100 - duplicationRate * 400));
162
+ const parts = [];
163
+ parts.push(`${sourceFiles.length} files scanned in ${elapsed}ms`);
164
+ if (filteredGroups.length > 0) {
165
+ parts.push(c.yellow + `${filteredGroups.length} clone groups` + c.reset);
166
+ parts.push(`${duplicatedLines} duplicated lines (${(duplicationRate * 100).toFixed(1)}%)`);
167
+ }
168
+ else {
169
+ parts.push('no cross-file clones detected');
170
+ }
171
+ return { name: 'clones', score, maxScore: 100, summary: parts.join(', '), issues };
172
+ }
package/dist/cli.js CHANGED
@@ -18,6 +18,7 @@ import { checkAIReady } from './checks/aiready.js';
18
18
  import { checkDeep } from './checks/deep.js';
19
19
  import { checkSemantic } from './checks/semantic.js';
20
20
  import { checkHotspots } from './checks/hotspots.js';
21
+ import { checkClones } from './checks/clones.js';
21
22
  import { checkReceipt, runReceiptCommand } from './checks/receipt.js';
22
23
  import { checkMemory } from './checks/memory.js';
23
24
  import { checkVerify } from './checks/verify.js';
@@ -326,7 +327,7 @@ async function runChecks() {
326
327
  }
327
328
  }
328
329
  // Run ALL independent checks in parallel
329
- const [scanResult, secretsResult, configResult, modelsResult, owaspResult, permissionsResult, integrityResult, readyResult, debtResult, depsResult, receiptResult, compactResult, subsidyResult, memoryResult, verifyResult, testsResult, loopResult, completenessResult, bloatResult, guardResult, explainResult, architectureResult, aireadyResult, deepResult, semanticResult, hotspotsResult,] = await Promise.all([
330
+ const [scanResult, secretsResult, configResult, modelsResult, owaspResult, permissionsResult, integrityResult, readyResult, debtResult, depsResult, receiptResult, compactResult, subsidyResult, memoryResult, verifyResult, testsResult, loopResult, completenessResult, bloatResult, guardResult, explainResult, architectureResult, aireadyResult, deepResult, semanticResult, hotspotsResult, clonesResult,] = await Promise.all([
330
331
  withTimeout('scan', () => checkScan(cwd)),
331
332
  withTimeout('secrets', () => checkSecrets(cwd)),
332
333
  withTimeout('config', () => checkConfig(cwd, ignore)),
@@ -353,6 +354,7 @@ async function runChecks() {
353
354
  withTimeout('deep', () => checkDeep(cwd), 60_000),
354
355
  withTimeout('semantic', () => checkSemantic(cwd), 60_000),
355
356
  withTimeout('hotspots', () => checkHotspots(cwd), 30_000),
357
+ withTimeout('clones', () => checkClones(cwd), 60_000),
356
358
  ]);
357
359
  // Git-dependent checks (diff + history) — parallel with each other
358
360
  const [diffResult, historyResult] = await Promise.all([
@@ -364,7 +366,7 @@ async function runChecks() {
364
366
  return score(cwd, {
365
367
  security: [scanResult, secretsResult, configResult, modelsResult, owaspResult, permissionsResult, subsidyResult, guardResult],
366
368
  integrity: [diffResult, integrityResult, receiptResult, compactResult, memoryResult, verifyResult, testsResult, loopResult, completenessResult, explainResult],
367
- debt: [readyResult, historyResult, debtResult, bloatResult],
369
+ debt: [readyResult, historyResult, debtResult, bloatResult, clonesResult],
368
370
  deps: [depsResult],
369
371
  architecture: [architectureResult],
370
372
  aiready: [aireadyResult, deepResult, semanticResult],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@safetnsr/vet",
3
- "version": "1.19.1",
3
+ "version": "1.19.2",
4
4
  "description": "vet your AI-generated code — one command, one score card, one letter grade",
5
5
  "type": "module",
6
6
  "bin": {