@aiready/pattern-detect 0.7.9 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -61,6 +61,10 @@ interface PatternSummary {
61
61
  tokenCost: number;
62
62
  }>;
63
63
  }
64
+ /**
65
+ * Determine smart defaults based on repository size estimation
66
+ */
67
+ declare function getSmartDefaults(directory: string, userOptions: Partial<PatternDetectOptions>): Promise<PatternDetectOptions>;
64
68
  declare function analyzePatterns(options: PatternDetectOptions): Promise<{
65
69
  results: AnalysisResult[];
66
70
  duplicates: DuplicatePattern[];
@@ -71,4 +75,4 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
71
75
  */
72
76
  declare function generateSummary(results: AnalysisResult[]): PatternSummary;
73
77
 
74
- export { type DuplicatePattern, type PatternDetectOptions, type PatternSummary, type PatternType, analyzePatterns, detectDuplicatePatterns, generateSummary };
78
+ export { type DuplicatePattern, type PatternDetectOptions, type PatternSummary, type PatternType, analyzePatterns, detectDuplicatePatterns, generateSummary, getSmartDefaults };
package/dist/index.js CHANGED
@@ -32,7 +32,8 @@ var index_exports = {};
32
32
  __export(index_exports, {
33
33
  analyzePatterns: () => analyzePatterns,
34
34
  detectDuplicatePatterns: () => detectDuplicatePatterns,
35
- generateSummary: () => generateSummary
35
+ generateSummary: () => generateSummary,
36
+ getSmartDefaults: () => getSmartDefaults
36
37
  });
37
38
  module.exports = __toCommonJS(index_exports);
38
39
  var import_core2 = require("@aiready/core");
@@ -220,7 +221,13 @@ async function detectDuplicatePatterns(files, options) {
220
221
  let candidates = null;
221
222
  if (approx) {
222
223
  const counts = /* @__PURE__ */ new Map();
223
- for (const tok of blockTokens[i]) {
224
+ const block1Tokens = new Set(blockTokens[i]);
225
+ const block1Size = block1Tokens.size;
226
+ const rareTokens = blockTokens[i].filter((tok) => {
227
+ const blocksWithToken = invertedIndex.get(tok)?.length || 0;
228
+ return blocksWithToken < allBlocks.length * 0.1;
229
+ });
230
+ for (const tok of rareTokens) {
224
231
  const ids = invertedIndex.get(tok);
225
232
  if (!ids) continue;
226
233
  for (const j of ids) {
@@ -229,7 +236,13 @@ async function detectDuplicatePatterns(files, options) {
229
236
  counts.set(j, (counts.get(j) || 0) + 1);
230
237
  }
231
238
  }
232
- candidates = Array.from(counts.entries()).filter(([, shared]) => shared >= minSharedTokens).sort((a, b) => b[1] - a[1]).slice(0, maxCandidatesPerBlock).map(([j, shared]) => ({ j, shared }));
239
+ candidates = Array.from(counts.entries()).filter(([j, shared]) => {
240
+ const block2Tokens = blockTokens[j];
241
+ const block2Size = block2Tokens.length;
242
+ const minSize = Math.min(block1Size, block2Size);
243
+ const sharedPercentage = shared / minSize;
244
+ return shared >= minSharedTokens && sharedPercentage >= 0.3;
245
+ }).sort((a, b) => b[1] - a[1]).slice(0, Math.min(maxCandidatesPerBlock, 5)).map(([j, shared]) => ({ j, shared }));
233
246
  }
234
247
  if (approx && candidates) {
235
248
  for (const { j } of candidates) {
@@ -322,12 +335,12 @@ async function getSmartDefaults(directory, userOptions) {
322
335
  if (userOptions.useSmartDefaults === false) {
323
336
  return {
324
337
  rootDir: directory,
325
- minSimilarity: 0.4,
326
- minLines: 5,
338
+ minSimilarity: 0.6,
339
+ minLines: 8,
327
340
  batchSize: 100,
328
341
  approx: true,
329
- minSharedTokens: 8,
330
- maxCandidatesPerBlock: 100,
342
+ minSharedTokens: 12,
343
+ maxCandidatesPerBlock: 5,
331
344
  streamResults: false,
332
345
  severity: "all",
333
346
  includeTests: false
@@ -348,12 +361,12 @@ async function getSmartDefaults(directory, userOptions) {
348
361
  const { scanFiles: scanFiles2 } = await import("@aiready/core");
349
362
  const files = await scanFiles2(scanOptions);
350
363
  const estimatedBlocks = files.length * 3;
351
- const maxCandidatesPerBlock = Math.max(10, Math.min(100, Math.floor(8e4 / estimatedBlocks)));
352
- const minSimilarity = Math.min(0.65, 0.4 + estimatedBlocks / 15e3 * 0.25);
353
- const minLines = Math.max(5, Math.min(10, 5 + Math.floor(estimatedBlocks / 3e3)));
354
- const minSharedTokens = Math.max(8, Math.min(15, 8 + Math.floor(estimatedBlocks / 4e3)));
355
- const batchSize = estimatedBlocks > 2e3 ? 300 : 150;
356
- const severity = estimatedBlocks > 8e3 ? "high" : "all";
364
+ const maxCandidatesPerBlock = Math.max(3, Math.min(10, Math.floor(3e4 / estimatedBlocks)));
365
+ const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
366
+ const minLines = Math.max(6, Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3)));
367
+ const minSharedTokens = Math.max(10, Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3)));
368
+ const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
369
+ const severity = estimatedBlocks > 5e3 ? "high" : "all";
357
370
  let defaults = {
358
371
  rootDir: directory,
359
372
  minSimilarity,
@@ -523,5 +536,6 @@ function generateSummary(results) {
523
536
  0 && (module.exports = {
524
537
  analyzePatterns,
525
538
  detectDuplicatePatterns,
526
- generateSummary
539
+ generateSummary,
540
+ getSmartDefaults
527
541
  });
package/dist/index.mjs CHANGED
@@ -1,10 +1,12 @@
1
1
  import {
2
2
  analyzePatterns,
3
3
  detectDuplicatePatterns,
4
- generateSummary
5
- } from "./chunk-S2KQFII2.mjs";
4
+ generateSummary,
5
+ getSmartDefaults
6
+ } from "./chunk-GSJFORRO.mjs";
6
7
  export {
7
8
  analyzePatterns,
8
9
  detectDuplicatePatterns,
9
- generateSummary
10
+ generateSummary,
11
+ getSmartDefaults
10
12
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiready/pattern-detect",
3
- "version": "0.7.9",
3
+ "version": "0.7.12",
4
4
  "description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -45,7 +45,7 @@
45
45
  "dependencies": {
46
46
  "commander": "^14.0.0",
47
47
  "chalk": "^5.3.0",
48
- "@aiready/core": "0.3.6"
48
+ "@aiready/core": "0.3.7"
49
49
  },
50
50
  "devDependencies": {
51
51
  "tsup": "^8.3.5",