@aiready/pattern-detect 0.11.32 → 0.11.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-FWUKMJEQ.mjs +1133 -0
- package/dist/chunk-YSDOUNJJ.mjs +1142 -0
- package/dist/cli.js +31 -18
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +31 -18
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -428,7 +428,9 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
428
428
|
linesOfCode: block.linesOfCode
|
|
429
429
|
}))
|
|
430
430
|
);
|
|
431
|
-
|
|
431
|
+
if (!options.onProgress) {
|
|
432
|
+
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
433
|
+
}
|
|
432
434
|
const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
|
|
433
435
|
if (pythonFiles.length > 0) {
|
|
434
436
|
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
@@ -446,7 +448,9 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
446
448
|
linesOfCode: p.endLine - p.startLine + 1
|
|
447
449
|
}));
|
|
448
450
|
allBlocks.push(...pythonBlocks);
|
|
449
|
-
|
|
451
|
+
if (!options.onProgress) {
|
|
452
|
+
console.log(`Added ${pythonBlocks.length} Python patterns`);
|
|
453
|
+
}
|
|
450
454
|
}
|
|
451
455
|
if (!approx && allBlocks.length > 500) {
|
|
452
456
|
console.log(
|
|
@@ -484,7 +488,11 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
484
488
|
"undefined",
|
|
485
489
|
"this"
|
|
486
490
|
]);
|
|
487
|
-
const tokenize = (norm) =>
|
|
491
|
+
const tokenize = (norm) => {
|
|
492
|
+
const punctuation = "(){}[];.,";
|
|
493
|
+
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
494
|
+
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
495
|
+
};
|
|
488
496
|
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
489
497
|
const invertedIndex = /* @__PURE__ */ new Map();
|
|
490
498
|
if (approx) {
|
|
@@ -518,20 +526,24 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
518
526
|
break;
|
|
519
527
|
}
|
|
520
528
|
if (i % batchSize === 0 && i > 0) {
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
if (totalComparisons !== void 0) {
|
|
524
|
-
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
525
|
-
const remaining = totalComparisons - comparisonsProcessed;
|
|
526
|
-
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
527
|
-
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
528
|
-
console.log(
|
|
529
|
-
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
530
|
-
);
|
|
529
|
+
if (options.onProgress) {
|
|
530
|
+
options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
|
|
531
531
|
} else {
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
)
|
|
532
|
+
const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
533
|
+
const duplicatesFound = duplicates.length;
|
|
534
|
+
if (totalComparisons !== void 0) {
|
|
535
|
+
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
536
|
+
const remaining = totalComparisons - comparisonsProcessed;
|
|
537
|
+
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
538
|
+
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
539
|
+
console.log(
|
|
540
|
+
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
541
|
+
);
|
|
542
|
+
} else {
|
|
543
|
+
console.log(
|
|
544
|
+
` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
|
|
545
|
+
);
|
|
546
|
+
}
|
|
535
547
|
}
|
|
536
548
|
await new Promise((resolve) => setImmediate(resolve));
|
|
537
549
|
}
|
|
@@ -1000,7 +1012,7 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
1000
1012
|
);
|
|
1001
1013
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
1002
1014
|
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
1003
|
-
|
|
1015
|
+
const defaults = {
|
|
1004
1016
|
rootDir: directory,
|
|
1005
1017
|
minSimilarity,
|
|
1006
1018
|
minLines,
|
|
@@ -1070,7 +1082,8 @@ async function analyzePatterns(options) {
|
|
|
1070
1082
|
approx,
|
|
1071
1083
|
minSharedTokens,
|
|
1072
1084
|
maxCandidatesPerBlock,
|
|
1073
|
-
streamResults
|
|
1085
|
+
streamResults,
|
|
1086
|
+
onProgress: options.onProgress
|
|
1074
1087
|
});
|
|
1075
1088
|
for (const file of files) {
|
|
1076
1089
|
const fileDuplicates = duplicates.filter(
|
package/dist/cli.mjs
CHANGED
package/dist/index.d.mts
CHANGED
|
@@ -57,6 +57,7 @@ interface DetectionOptions {
|
|
|
57
57
|
maxCandidatesPerBlock?: number;
|
|
58
58
|
maxComparisons?: number;
|
|
59
59
|
streamResults?: boolean;
|
|
60
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
60
61
|
}
|
|
61
62
|
/**
|
|
62
63
|
* Detect duplicate patterns across files with enhanced analysis
|
|
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
|
|
|
130
131
|
createClusters?: boolean;
|
|
131
132
|
minClusterTokenCost?: number;
|
|
132
133
|
minClusterFiles?: number;
|
|
134
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
133
135
|
}
|
|
134
136
|
interface PatternSummary {
|
|
135
137
|
totalPatterns: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -57,6 +57,7 @@ interface DetectionOptions {
|
|
|
57
57
|
maxCandidatesPerBlock?: number;
|
|
58
58
|
maxComparisons?: number;
|
|
59
59
|
streamResults?: boolean;
|
|
60
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
60
61
|
}
|
|
61
62
|
/**
|
|
62
63
|
* Detect duplicate patterns across files with enhanced analysis
|
|
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
|
|
|
130
131
|
createClusters?: boolean;
|
|
131
132
|
minClusterTokenCost?: number;
|
|
132
133
|
minClusterFiles?: number;
|
|
134
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
133
135
|
}
|
|
134
136
|
interface PatternSummary {
|
|
135
137
|
totalPatterns: number;
|
package/dist/index.js
CHANGED
|
@@ -446,7 +446,9 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
446
446
|
linesOfCode: block.linesOfCode
|
|
447
447
|
}))
|
|
448
448
|
);
|
|
449
|
-
|
|
449
|
+
if (!options.onProgress) {
|
|
450
|
+
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
451
|
+
}
|
|
450
452
|
const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
|
|
451
453
|
if (pythonFiles.length > 0) {
|
|
452
454
|
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
@@ -464,7 +466,9 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
464
466
|
linesOfCode: p.endLine - p.startLine + 1
|
|
465
467
|
}));
|
|
466
468
|
allBlocks.push(...pythonBlocks);
|
|
467
|
-
|
|
469
|
+
if (!options.onProgress) {
|
|
470
|
+
console.log(`Added ${pythonBlocks.length} Python patterns`);
|
|
471
|
+
}
|
|
468
472
|
}
|
|
469
473
|
if (!approx && allBlocks.length > 500) {
|
|
470
474
|
console.log(
|
|
@@ -502,7 +506,11 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
502
506
|
"undefined",
|
|
503
507
|
"this"
|
|
504
508
|
]);
|
|
505
|
-
const tokenize = (norm) =>
|
|
509
|
+
const tokenize = (norm) => {
|
|
510
|
+
const punctuation = "(){}[];.,";
|
|
511
|
+
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
512
|
+
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
513
|
+
};
|
|
506
514
|
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
507
515
|
const invertedIndex = /* @__PURE__ */ new Map();
|
|
508
516
|
if (approx) {
|
|
@@ -536,20 +544,24 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
536
544
|
break;
|
|
537
545
|
}
|
|
538
546
|
if (i % batchSize === 0 && i > 0) {
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
if (totalComparisons !== void 0) {
|
|
542
|
-
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
543
|
-
const remaining = totalComparisons - comparisonsProcessed;
|
|
544
|
-
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
545
|
-
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
546
|
-
console.log(
|
|
547
|
-
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
548
|
-
);
|
|
547
|
+
if (options.onProgress) {
|
|
548
|
+
options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
|
|
549
549
|
} else {
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
)
|
|
550
|
+
const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
551
|
+
const duplicatesFound = duplicates.length;
|
|
552
|
+
if (totalComparisons !== void 0) {
|
|
553
|
+
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
554
|
+
const remaining = totalComparisons - comparisonsProcessed;
|
|
555
|
+
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
556
|
+
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
557
|
+
console.log(
|
|
558
|
+
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
559
|
+
);
|
|
560
|
+
} else {
|
|
561
|
+
console.log(
|
|
562
|
+
` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
|
|
563
|
+
);
|
|
564
|
+
}
|
|
553
565
|
}
|
|
554
566
|
await new Promise((resolve) => setImmediate(resolve));
|
|
555
567
|
}
|
|
@@ -1119,7 +1131,7 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
1119
1131
|
);
|
|
1120
1132
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
1121
1133
|
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
1122
|
-
|
|
1134
|
+
const defaults = {
|
|
1123
1135
|
rootDir: directory,
|
|
1124
1136
|
minSimilarity,
|
|
1125
1137
|
minLines,
|
|
@@ -1189,7 +1201,8 @@ async function analyzePatterns(options) {
|
|
|
1189
1201
|
approx,
|
|
1190
1202
|
minSharedTokens,
|
|
1191
1203
|
maxCandidatesPerBlock,
|
|
1192
|
-
streamResults
|
|
1204
|
+
streamResults,
|
|
1205
|
+
onProgress: options.onProgress
|
|
1193
1206
|
});
|
|
1194
1207
|
for (const file of files) {
|
|
1195
1208
|
const fileDuplicates = duplicates.filter(
|
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aiready/pattern-detect",
|
|
3
|
-
"version": "0.11.
|
|
3
|
+
"version": "0.11.36",
|
|
4
4
|
"description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"commander": "^14.0.0",
|
|
47
47
|
"chalk": "^5.3.0",
|
|
48
|
-
"@aiready/core": "0.9.
|
|
48
|
+
"@aiready/core": "0.9.37"
|
|
49
49
|
},
|
|
50
50
|
"devDependencies": {
|
|
51
51
|
"tsup": "^8.3.5",
|