@aiready/pattern-detect 0.14.0 → 0.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-3WK24ZOX.mjs +860 -0
- package/dist/chunk-EXORBAXR.mjs +887 -0
- package/dist/chunk-KC2CQMG2.mjs +858 -0
- package/dist/chunk-V5DP4FP6.mjs +876 -0
- package/dist/cli.js +44 -18
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +44 -18
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -256,7 +256,7 @@ function calculateSimilarity(a, b) {
|
|
|
256
256
|
return intersection.size / union.size;
|
|
257
257
|
}
|
|
258
258
|
async function detectDuplicatePatterns(fileContents, options) {
|
|
259
|
-
const { minSimilarity, minLines, streamResults } = options;
|
|
259
|
+
const { minSimilarity, minLines, streamResults, onProgress } = options;
|
|
260
260
|
const allBlocks = [];
|
|
261
261
|
for (const { file, content } of fileContents) {
|
|
262
262
|
const blocks = extractBlocks(file, content);
|
|
@@ -265,12 +265,33 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
265
265
|
);
|
|
266
266
|
}
|
|
267
267
|
const duplicates = [];
|
|
268
|
+
const totalBlocks = allBlocks.length;
|
|
269
|
+
let comparisons = 0;
|
|
270
|
+
const totalComparisons = totalBlocks * (totalBlocks - 1) / 2;
|
|
271
|
+
if (onProgress) {
|
|
272
|
+
onProgress(
|
|
273
|
+
0,
|
|
274
|
+
totalComparisons,
|
|
275
|
+
`Starting duplicate detection on ${totalBlocks} blocks...`
|
|
276
|
+
);
|
|
277
|
+
}
|
|
268
278
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
279
|
+
if (i % 50 === 0 && i > 0) {
|
|
280
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
281
|
+
if (onProgress) {
|
|
282
|
+
onProgress(
|
|
283
|
+
comparisons,
|
|
284
|
+
totalComparisons,
|
|
285
|
+
`Analyzing blocks (${i}/${totalBlocks})...`
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
const b1 = allBlocks[i];
|
|
290
|
+
const norm1 = normalizeCode(b1.code);
|
|
269
291
|
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
270
|
-
|
|
292
|
+
comparisons++;
|
|
271
293
|
const b2 = allBlocks[j];
|
|
272
294
|
if (b1.file === b2.file) continue;
|
|
273
|
-
const norm1 = normalizeCode(b1.code);
|
|
274
295
|
const norm2 = normalizeCode(b2.code);
|
|
275
296
|
const sim = calculateSimilarity(norm1, norm2);
|
|
276
297
|
if (sim >= minSimilarity) {
|
|
@@ -306,6 +327,13 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
306
327
|
}
|
|
307
328
|
}
|
|
308
329
|
}
|
|
330
|
+
if (onProgress) {
|
|
331
|
+
onProgress(
|
|
332
|
+
totalComparisons,
|
|
333
|
+
totalComparisons,
|
|
334
|
+
`Duplicate detection complete. Found ${duplicates.length} patterns.`
|
|
335
|
+
);
|
|
336
|
+
}
|
|
309
337
|
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
310
338
|
}
|
|
311
339
|
|
|
@@ -557,7 +585,8 @@ var PatternDetectProvider = {
|
|
|
557
585
|
),
|
|
558
586
|
duplicates: results.duplicates,
|
|
559
587
|
groups: results.groups,
|
|
560
|
-
clusters: results.clusters
|
|
588
|
+
clusters: results.clusters,
|
|
589
|
+
config: results.config
|
|
561
590
|
},
|
|
562
591
|
metadata: {
|
|
563
592
|
toolName: import_core5.ToolName.PatternDetect,
|
|
@@ -615,29 +644,26 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
615
644
|
};
|
|
616
645
|
const { scanFiles: scanFiles2 } = await import("@aiready/core");
|
|
617
646
|
const files = await scanFiles2(scanOptions);
|
|
618
|
-
const
|
|
619
|
-
const
|
|
620
|
-
3,
|
|
621
|
-
Math.min(10, Math.floor(3e4 / estimatedBlocks))
|
|
622
|
-
);
|
|
623
|
-
const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
|
|
647
|
+
const fileCount = files.length;
|
|
648
|
+
const estimatedBlocks = fileCount * 5;
|
|
624
649
|
const minLines = Math.max(
|
|
625
650
|
6,
|
|
626
|
-
Math.min(
|
|
627
|
-
);
|
|
628
|
-
const minSharedTokens = Math.max(
|
|
629
|
-
10,
|
|
630
|
-
Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3))
|
|
651
|
+
Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
|
|
631
652
|
);
|
|
653
|
+
const minSimilarity = Math.min(0.85, 0.5 + estimatedBlocks / 5e3 * 0.3);
|
|
632
654
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
633
|
-
const severity = estimatedBlocks >
|
|
655
|
+
const severity = estimatedBlocks > 3e3 ? "high" : "all";
|
|
656
|
+
const maxCandidatesPerBlock = Math.max(
|
|
657
|
+
5,
|
|
658
|
+
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
659
|
+
);
|
|
634
660
|
const defaults = {
|
|
635
661
|
rootDir: directory,
|
|
636
662
|
minSimilarity,
|
|
637
663
|
minLines,
|
|
638
664
|
batchSize,
|
|
639
665
|
approx: true,
|
|
640
|
-
minSharedTokens,
|
|
666
|
+
minSharedTokens: 10,
|
|
641
667
|
maxCandidatesPerBlock,
|
|
642
668
|
streamResults: false,
|
|
643
669
|
severity,
|
|
@@ -766,7 +792,7 @@ async function analyzePatterns(options) {
|
|
|
766
792
|
minClusterFiles
|
|
767
793
|
);
|
|
768
794
|
}
|
|
769
|
-
return { results, duplicates, files, groups, clusters };
|
|
795
|
+
return { results, duplicates, files, groups, clusters, config: finalOptions };
|
|
770
796
|
}
|
|
771
797
|
function generateSummary(results) {
|
|
772
798
|
const allIssues = results.flatMap((r) => r.issues);
|
package/dist/cli.mjs
CHANGED
package/dist/index.d.mts
CHANGED
|
@@ -150,6 +150,7 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
|
|
|
150
150
|
files: string[];
|
|
151
151
|
groups?: DuplicateGroup[];
|
|
152
152
|
clusters?: RefactorCluster[];
|
|
153
|
+
config: PatternDetectOptions;
|
|
153
154
|
}>;
|
|
154
155
|
/**
|
|
155
156
|
* Generate a summary of pattern analysis
|
package/dist/index.d.ts
CHANGED
|
@@ -150,6 +150,7 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
|
|
|
150
150
|
files: string[];
|
|
151
151
|
groups?: DuplicateGroup[];
|
|
152
152
|
clusters?: RefactorCluster[];
|
|
153
|
+
config: PatternDetectOptions;
|
|
153
154
|
}>;
|
|
154
155
|
/**
|
|
155
156
|
* Generate a summary of pattern analysis
|
package/dist/index.js
CHANGED
|
@@ -280,7 +280,7 @@ function calculateSimilarity(a, b) {
|
|
|
280
280
|
return intersection.size / union.size;
|
|
281
281
|
}
|
|
282
282
|
async function detectDuplicatePatterns(fileContents, options) {
|
|
283
|
-
const { minSimilarity, minLines, streamResults } = options;
|
|
283
|
+
const { minSimilarity, minLines, streamResults, onProgress } = options;
|
|
284
284
|
const allBlocks = [];
|
|
285
285
|
for (const { file, content } of fileContents) {
|
|
286
286
|
const blocks = extractBlocks(file, content);
|
|
@@ -289,12 +289,33 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
289
289
|
);
|
|
290
290
|
}
|
|
291
291
|
const duplicates = [];
|
|
292
|
+
const totalBlocks = allBlocks.length;
|
|
293
|
+
let comparisons = 0;
|
|
294
|
+
const totalComparisons = totalBlocks * (totalBlocks - 1) / 2;
|
|
295
|
+
if (onProgress) {
|
|
296
|
+
onProgress(
|
|
297
|
+
0,
|
|
298
|
+
totalComparisons,
|
|
299
|
+
`Starting duplicate detection on ${totalBlocks} blocks...`
|
|
300
|
+
);
|
|
301
|
+
}
|
|
292
302
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
303
|
+
if (i % 50 === 0 && i > 0) {
|
|
304
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
305
|
+
if (onProgress) {
|
|
306
|
+
onProgress(
|
|
307
|
+
comparisons,
|
|
308
|
+
totalComparisons,
|
|
309
|
+
`Analyzing blocks (${i}/${totalBlocks})...`
|
|
310
|
+
);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
const b1 = allBlocks[i];
|
|
314
|
+
const norm1 = normalizeCode(b1.code);
|
|
293
315
|
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
294
|
-
|
|
316
|
+
comparisons++;
|
|
295
317
|
const b2 = allBlocks[j];
|
|
296
318
|
if (b1.file === b2.file) continue;
|
|
297
|
-
const norm1 = normalizeCode(b1.code);
|
|
298
319
|
const norm2 = normalizeCode(b2.code);
|
|
299
320
|
const sim = calculateSimilarity(norm1, norm2);
|
|
300
321
|
if (sim >= minSimilarity) {
|
|
@@ -330,6 +351,13 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
330
351
|
}
|
|
331
352
|
}
|
|
332
353
|
}
|
|
354
|
+
if (onProgress) {
|
|
355
|
+
onProgress(
|
|
356
|
+
totalComparisons,
|
|
357
|
+
totalComparisons,
|
|
358
|
+
`Duplicate detection complete. Found ${duplicates.length} patterns.`
|
|
359
|
+
);
|
|
360
|
+
}
|
|
333
361
|
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
334
362
|
}
|
|
335
363
|
|
|
@@ -581,7 +609,8 @@ var PatternDetectProvider = {
|
|
|
581
609
|
),
|
|
582
610
|
duplicates: results.duplicates,
|
|
583
611
|
groups: results.groups,
|
|
584
|
-
clusters: results.clusters
|
|
612
|
+
clusters: results.clusters,
|
|
613
|
+
config: results.config
|
|
585
614
|
},
|
|
586
615
|
metadata: {
|
|
587
616
|
toolName: import_core5.ToolName.PatternDetect,
|
|
@@ -639,29 +668,26 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
639
668
|
};
|
|
640
669
|
const { scanFiles: scanFiles2 } = await import("@aiready/core");
|
|
641
670
|
const files = await scanFiles2(scanOptions);
|
|
642
|
-
const
|
|
643
|
-
const
|
|
644
|
-
3,
|
|
645
|
-
Math.min(10, Math.floor(3e4 / estimatedBlocks))
|
|
646
|
-
);
|
|
647
|
-
const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
|
|
671
|
+
const fileCount = files.length;
|
|
672
|
+
const estimatedBlocks = fileCount * 5;
|
|
648
673
|
const minLines = Math.max(
|
|
649
674
|
6,
|
|
650
|
-
Math.min(
|
|
651
|
-
);
|
|
652
|
-
const minSharedTokens = Math.max(
|
|
653
|
-
10,
|
|
654
|
-
Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3))
|
|
675
|
+
Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
|
|
655
676
|
);
|
|
677
|
+
const minSimilarity = Math.min(0.85, 0.5 + estimatedBlocks / 5e3 * 0.3);
|
|
656
678
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
657
|
-
const severity = estimatedBlocks >
|
|
679
|
+
const severity = estimatedBlocks > 3e3 ? "high" : "all";
|
|
680
|
+
const maxCandidatesPerBlock = Math.max(
|
|
681
|
+
5,
|
|
682
|
+
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
683
|
+
);
|
|
658
684
|
const defaults = {
|
|
659
685
|
rootDir: directory,
|
|
660
686
|
minSimilarity,
|
|
661
687
|
minLines,
|
|
662
688
|
batchSize,
|
|
663
689
|
approx: true,
|
|
664
|
-
minSharedTokens,
|
|
690
|
+
minSharedTokens: 10,
|
|
665
691
|
maxCandidatesPerBlock,
|
|
666
692
|
streamResults: false,
|
|
667
693
|
severity,
|
|
@@ -790,7 +816,7 @@ async function analyzePatterns(options) {
|
|
|
790
816
|
minClusterFiles
|
|
791
817
|
);
|
|
792
818
|
}
|
|
793
|
-
return { results, duplicates, files, groups, clusters };
|
|
819
|
+
return { results, duplicates, files, groups, clusters, config: finalOptions };
|
|
794
820
|
}
|
|
795
821
|
function generateSummary(results) {
|
|
796
822
|
const allIssues = results.flatMap((r) => r.issues);
|
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aiready/pattern-detect",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.3",
|
|
4
4
|
"description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"commander": "^14.0.0",
|
|
47
47
|
"chalk": "^5.3.0",
|
|
48
|
-
"@aiready/core": "0.21.
|
|
48
|
+
"@aiready/core": "0.21.3"
|
|
49
49
|
},
|
|
50
50
|
"devDependencies": {
|
|
51
51
|
"tsup": "^8.3.5",
|