@aiready/pattern-detect 0.7.9 → 0.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -434
- package/dist/chunk-GSJFORRO.mjs +504 -0
- package/dist/chunk-R2S73CVG.mjs +503 -0
- package/dist/cli.js +39 -13
- package/dist/cli.mjs +16 -2
- package/dist/index.d.mts +5 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +28 -14
- package/dist/index.mjs +5 -3
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -61,6 +61,10 @@ interface PatternSummary {
|
|
|
61
61
|
tokenCost: number;
|
|
62
62
|
}>;
|
|
63
63
|
}
|
|
64
|
+
/**
|
|
65
|
+
* Determine smart defaults based on repository size estimation
|
|
66
|
+
*/
|
|
67
|
+
declare function getSmartDefaults(directory: string, userOptions: Partial<PatternDetectOptions>): Promise<PatternDetectOptions>;
|
|
64
68
|
declare function analyzePatterns(options: PatternDetectOptions): Promise<{
|
|
65
69
|
results: AnalysisResult[];
|
|
66
70
|
duplicates: DuplicatePattern[];
|
|
@@ -71,4 +75,4 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
|
|
|
71
75
|
*/
|
|
72
76
|
declare function generateSummary(results: AnalysisResult[]): PatternSummary;
|
|
73
77
|
|
|
74
|
-
export { type DuplicatePattern, type PatternDetectOptions, type PatternSummary, type PatternType, analyzePatterns, detectDuplicatePatterns, generateSummary };
|
|
78
|
+
export { type DuplicatePattern, type PatternDetectOptions, type PatternSummary, type PatternType, analyzePatterns, detectDuplicatePatterns, generateSummary, getSmartDefaults };
|
package/dist/index.js
CHANGED
|
@@ -32,7 +32,8 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
analyzePatterns: () => analyzePatterns,
|
|
34
34
|
detectDuplicatePatterns: () => detectDuplicatePatterns,
|
|
35
|
-
generateSummary: () => generateSummary
|
|
35
|
+
generateSummary: () => generateSummary,
|
|
36
|
+
getSmartDefaults: () => getSmartDefaults
|
|
36
37
|
});
|
|
37
38
|
module.exports = __toCommonJS(index_exports);
|
|
38
39
|
var import_core2 = require("@aiready/core");
|
|
@@ -220,7 +221,13 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
220
221
|
let candidates = null;
|
|
221
222
|
if (approx) {
|
|
222
223
|
const counts = /* @__PURE__ */ new Map();
|
|
223
|
-
|
|
224
|
+
const block1Tokens = new Set(blockTokens[i]);
|
|
225
|
+
const block1Size = block1Tokens.size;
|
|
226
|
+
const rareTokens = blockTokens[i].filter((tok) => {
|
|
227
|
+
const blocksWithToken = invertedIndex.get(tok)?.length || 0;
|
|
228
|
+
return blocksWithToken < allBlocks.length * 0.1;
|
|
229
|
+
});
|
|
230
|
+
for (const tok of rareTokens) {
|
|
224
231
|
const ids = invertedIndex.get(tok);
|
|
225
232
|
if (!ids) continue;
|
|
226
233
|
for (const j of ids) {
|
|
@@ -229,7 +236,13 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
229
236
|
counts.set(j, (counts.get(j) || 0) + 1);
|
|
230
237
|
}
|
|
231
238
|
}
|
|
232
|
-
candidates = Array.from(counts.entries()).filter(([
|
|
239
|
+
candidates = Array.from(counts.entries()).filter(([j, shared]) => {
|
|
240
|
+
const block2Tokens = blockTokens[j];
|
|
241
|
+
const block2Size = block2Tokens.length;
|
|
242
|
+
const minSize = Math.min(block1Size, block2Size);
|
|
243
|
+
const sharedPercentage = shared / minSize;
|
|
244
|
+
return shared >= minSharedTokens && sharedPercentage >= 0.3;
|
|
245
|
+
}).sort((a, b) => b[1] - a[1]).slice(0, Math.min(maxCandidatesPerBlock, 5)).map(([j, shared]) => ({ j, shared }));
|
|
233
246
|
}
|
|
234
247
|
if (approx && candidates) {
|
|
235
248
|
for (const { j } of candidates) {
|
|
@@ -322,12 +335,12 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
322
335
|
if (userOptions.useSmartDefaults === false) {
|
|
323
336
|
return {
|
|
324
337
|
rootDir: directory,
|
|
325
|
-
minSimilarity: 0.
|
|
326
|
-
minLines:
|
|
338
|
+
minSimilarity: 0.6,
|
|
339
|
+
minLines: 8,
|
|
327
340
|
batchSize: 100,
|
|
328
341
|
approx: true,
|
|
329
|
-
minSharedTokens:
|
|
330
|
-
maxCandidatesPerBlock:
|
|
342
|
+
minSharedTokens: 12,
|
|
343
|
+
maxCandidatesPerBlock: 5,
|
|
331
344
|
streamResults: false,
|
|
332
345
|
severity: "all",
|
|
333
346
|
includeTests: false
|
|
@@ -348,12 +361,12 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
348
361
|
const { scanFiles: scanFiles2 } = await import("@aiready/core");
|
|
349
362
|
const files = await scanFiles2(scanOptions);
|
|
350
363
|
const estimatedBlocks = files.length * 3;
|
|
351
|
-
const maxCandidatesPerBlock = Math.max(
|
|
352
|
-
const minSimilarity = Math.min(0.
|
|
353
|
-
const minLines = Math.max(
|
|
354
|
-
const minSharedTokens = Math.max(
|
|
355
|
-
const batchSize = estimatedBlocks >
|
|
356
|
-
const severity = estimatedBlocks >
|
|
364
|
+
const maxCandidatesPerBlock = Math.max(3, Math.min(10, Math.floor(3e4 / estimatedBlocks)));
|
|
365
|
+
const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
|
|
366
|
+
const minLines = Math.max(6, Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3)));
|
|
367
|
+
const minSharedTokens = Math.max(10, Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3)));
|
|
368
|
+
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
369
|
+
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
357
370
|
let defaults = {
|
|
358
371
|
rootDir: directory,
|
|
359
372
|
minSimilarity,
|
|
@@ -523,5 +536,6 @@ function generateSummary(results) {
|
|
|
523
536
|
0 && (module.exports = {
|
|
524
537
|
analyzePatterns,
|
|
525
538
|
detectDuplicatePatterns,
|
|
526
|
-
generateSummary
|
|
539
|
+
generateSummary,
|
|
540
|
+
getSmartDefaults
|
|
527
541
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
analyzePatterns,
|
|
3
3
|
detectDuplicatePatterns,
|
|
4
|
-
generateSummary
|
|
5
|
-
|
|
4
|
+
generateSummary,
|
|
5
|
+
getSmartDefaults
|
|
6
|
+
} from "./chunk-GSJFORRO.mjs";
|
|
6
7
|
export {
|
|
7
8
|
analyzePatterns,
|
|
8
9
|
detectDuplicatePatterns,
|
|
9
|
-
generateSummary
|
|
10
|
+
generateSummary,
|
|
11
|
+
getSmartDefaults
|
|
10
12
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aiready/pattern-detect",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.12",
|
|
4
4
|
"description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"commander": "^14.0.0",
|
|
47
47
|
"chalk": "^5.3.0",
|
|
48
|
-
"@aiready/core": "0.3.
|
|
48
|
+
"@aiready/core": "0.3.7"
|
|
49
49
|
},
|
|
50
50
|
"devDependencies": {
|
|
51
51
|
"tsup": "^8.3.5",
|