@aiready/pattern-detect 0.11.32 → 0.11.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -428,7 +428,9 @@ async function detectDuplicatePatterns(files, options) {
428
428
  linesOfCode: block.linesOfCode
429
429
  }))
430
430
  );
431
- console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
431
+ if (!options.onProgress) {
432
+ console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
433
+ }
432
434
  const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
433
435
  if (pythonFiles.length > 0) {
434
436
  const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
@@ -446,7 +448,9 @@ async function detectDuplicatePatterns(files, options) {
446
448
  linesOfCode: p.endLine - p.startLine + 1
447
449
  }));
448
450
  allBlocks.push(...pythonBlocks);
449
- console.log(`Added ${pythonBlocks.length} Python patterns`);
451
+ if (!options.onProgress) {
452
+ console.log(`Added ${pythonBlocks.length} Python patterns`);
453
+ }
450
454
  }
451
455
  if (!approx && allBlocks.length > 500) {
452
456
  console.log(
@@ -484,7 +488,11 @@ async function detectDuplicatePatterns(files, options) {
484
488
  "undefined",
485
489
  "this"
486
490
  ]);
487
- const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
491
+ const tokenize = (norm) => {
492
+ const punctuation = "(){}[];.,";
493
+ const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
494
+ return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
495
+ };
488
496
  const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
489
497
  const invertedIndex = /* @__PURE__ */ new Map();
490
498
  if (approx) {
@@ -518,20 +526,24 @@ async function detectDuplicatePatterns(files, options) {
518
526
  break;
519
527
  }
520
528
  if (i % batchSize === 0 && i > 0) {
521
- const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
522
- const duplicatesFound = duplicates.length;
523
- if (totalComparisons !== void 0) {
524
- const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
525
- const remaining = totalComparisons - comparisonsProcessed;
526
- const rate = comparisonsProcessed / parseFloat(elapsed);
527
- const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
528
- console.log(
529
- ` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
530
- );
529
+ if (options.onProgress) {
530
+ options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
531
531
  } else {
532
- console.log(
533
- ` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
534
- );
532
+ const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
533
+ const duplicatesFound = duplicates.length;
534
+ if (totalComparisons !== void 0) {
535
+ const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
536
+ const remaining = totalComparisons - comparisonsProcessed;
537
+ const rate = comparisonsProcessed / parseFloat(elapsed);
538
+ const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
539
+ console.log(
540
+ ` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
541
+ );
542
+ } else {
543
+ console.log(
544
+ ` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
545
+ );
546
+ }
535
547
  }
536
548
  await new Promise((resolve) => setImmediate(resolve));
537
549
  }
@@ -1000,7 +1012,7 @@ async function getSmartDefaults(directory, userOptions) {
1000
1012
  );
1001
1013
  const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
1002
1014
  const severity = estimatedBlocks > 5e3 ? "high" : "all";
1003
- let defaults = {
1015
+ const defaults = {
1004
1016
  rootDir: directory,
1005
1017
  minSimilarity,
1006
1018
  minLines,
@@ -1070,7 +1082,8 @@ async function analyzePatterns(options) {
1070
1082
  approx,
1071
1083
  minSharedTokens,
1072
1084
  maxCandidatesPerBlock,
1073
- streamResults
1085
+ streamResults,
1086
+ onProgress: options.onProgress
1074
1087
  });
1075
1088
  for (const file of files) {
1076
1089
  const fileDuplicates = duplicates.filter(
package/dist/cli.mjs CHANGED
@@ -3,7 +3,7 @@ import {
3
3
  analyzePatterns,
4
4
  filterBySeverity,
5
5
  generateSummary
6
- } from "./chunk-SLDK5PQK.mjs";
6
+ } from "./chunk-YSDOUNJJ.mjs";
7
7
 
8
8
  // src/cli.ts
9
9
  import { Command } from "commander";
package/dist/index.d.mts CHANGED
@@ -57,6 +57,7 @@ interface DetectionOptions {
57
57
  maxCandidatesPerBlock?: number;
58
58
  maxComparisons?: number;
59
59
  streamResults?: boolean;
60
+ onProgress?: (processed: number, total: number, message: string) => void;
60
61
  }
61
62
  /**
62
63
  * Detect duplicate patterns across files with enhanced analysis
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
130
131
  createClusters?: boolean;
131
132
  minClusterTokenCost?: number;
132
133
  minClusterFiles?: number;
134
+ onProgress?: (processed: number, total: number, message: string) => void;
133
135
  }
134
136
  interface PatternSummary {
135
137
  totalPatterns: number;
package/dist/index.d.ts CHANGED
@@ -57,6 +57,7 @@ interface DetectionOptions {
57
57
  maxCandidatesPerBlock?: number;
58
58
  maxComparisons?: number;
59
59
  streamResults?: boolean;
60
+ onProgress?: (processed: number, total: number, message: string) => void;
60
61
  }
61
62
  /**
62
63
  * Detect duplicate patterns across files with enhanced analysis
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
130
131
  createClusters?: boolean;
131
132
  minClusterTokenCost?: number;
132
133
  minClusterFiles?: number;
134
+ onProgress?: (processed: number, total: number, message: string) => void;
133
135
  }
134
136
  interface PatternSummary {
135
137
  totalPatterns: number;
package/dist/index.js CHANGED
@@ -446,7 +446,9 @@ async function detectDuplicatePatterns(files, options) {
446
446
  linesOfCode: block.linesOfCode
447
447
  }))
448
448
  );
449
- console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
449
+ if (!options.onProgress) {
450
+ console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
451
+ }
450
452
  const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
451
453
  if (pythonFiles.length > 0) {
452
454
  const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
@@ -464,7 +466,9 @@ async function detectDuplicatePatterns(files, options) {
464
466
  linesOfCode: p.endLine - p.startLine + 1
465
467
  }));
466
468
  allBlocks.push(...pythonBlocks);
467
- console.log(`Added ${pythonBlocks.length} Python patterns`);
469
+ if (!options.onProgress) {
470
+ console.log(`Added ${pythonBlocks.length} Python patterns`);
471
+ }
468
472
  }
469
473
  if (!approx && allBlocks.length > 500) {
470
474
  console.log(
@@ -502,7 +506,11 @@ async function detectDuplicatePatterns(files, options) {
502
506
  "undefined",
503
507
  "this"
504
508
  ]);
505
- const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
509
+ const tokenize = (norm) => {
510
+ const punctuation = "(){}[];.,";
511
+ const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
512
+ return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
513
+ };
506
514
  const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
507
515
  const invertedIndex = /* @__PURE__ */ new Map();
508
516
  if (approx) {
@@ -536,20 +544,24 @@ async function detectDuplicatePatterns(files, options) {
536
544
  break;
537
545
  }
538
546
  if (i % batchSize === 0 && i > 0) {
539
- const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
540
- const duplicatesFound = duplicates.length;
541
- if (totalComparisons !== void 0) {
542
- const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
543
- const remaining = totalComparisons - comparisonsProcessed;
544
- const rate = comparisonsProcessed / parseFloat(elapsed);
545
- const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
546
- console.log(
547
- ` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
548
- );
547
+ if (options.onProgress) {
548
+ options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
549
549
  } else {
550
- console.log(
551
- ` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
552
- );
550
+ const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
551
+ const duplicatesFound = duplicates.length;
552
+ if (totalComparisons !== void 0) {
553
+ const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
554
+ const remaining = totalComparisons - comparisonsProcessed;
555
+ const rate = comparisonsProcessed / parseFloat(elapsed);
556
+ const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
557
+ console.log(
558
+ ` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
559
+ );
560
+ } else {
561
+ console.log(
562
+ ` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
563
+ );
564
+ }
553
565
  }
554
566
  await new Promise((resolve) => setImmediate(resolve));
555
567
  }
@@ -1119,7 +1131,7 @@ async function getSmartDefaults(directory, userOptions) {
1119
1131
  );
1120
1132
  const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
1121
1133
  const severity = estimatedBlocks > 5e3 ? "high" : "all";
1122
- let defaults = {
1134
+ const defaults = {
1123
1135
  rootDir: directory,
1124
1136
  minSimilarity,
1125
1137
  minLines,
@@ -1189,7 +1201,8 @@ async function analyzePatterns(options) {
1189
1201
  approx,
1190
1202
  minSharedTokens,
1191
1203
  maxCandidatesPerBlock,
1192
- streamResults
1204
+ streamResults,
1205
+ onProgress: options.onProgress
1193
1206
  });
1194
1207
  for (const file of files) {
1195
1208
  const fileDuplicates = duplicates.filter(
package/dist/index.mjs CHANGED
@@ -7,7 +7,7 @@ import {
7
7
  generateSummary,
8
8
  getSeverityLabel,
9
9
  getSmartDefaults
10
- } from "./chunk-SLDK5PQK.mjs";
10
+ } from "./chunk-YSDOUNJJ.mjs";
11
11
  export {
12
12
  analyzePatterns,
13
13
  calculatePatternScore,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiready/pattern-detect",
3
- "version": "0.11.32",
3
+ "version": "0.11.36",
4
4
  "description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -45,7 +45,7 @@
45
45
  "dependencies": {
46
46
  "commander": "^14.0.0",
47
47
  "chalk": "^5.3.0",
48
- "@aiready/core": "0.9.33"
48
+ "@aiready/core": "0.9.37"
49
49
  },
50
50
  "devDependencies": {
51
51
  "tsup": "^8.3.5",