@aiready/pattern-detect 0.16.6 → 0.16.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-DR5W7S3Z.mjs +968 -0
- package/dist/chunk-PSVG2NLH.mjs +966 -0
- package/dist/cli.js +170 -90
- package/dist/cli.mjs +118 -85
- package/dist/index.d.mts +7 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +54 -5
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
package/dist/cli.mjs
CHANGED
|
@@ -3,10 +3,12 @@ import {
|
|
|
3
3
|
analyzePatterns,
|
|
4
4
|
filterBySeverity,
|
|
5
5
|
generateSummary
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-DR5W7S3Z.mjs";
|
|
7
7
|
|
|
8
8
|
// src/cli.ts
|
|
9
9
|
import { Command } from "commander";
|
|
10
|
+
|
|
11
|
+
// src/cli-action.ts
|
|
10
12
|
import chalk from "chalk";
|
|
11
13
|
import { writeFileSync, mkdirSync, existsSync } from "fs";
|
|
12
14
|
import { dirname } from "path";
|
|
@@ -118,80 +120,61 @@ function generateHTMLReport(results, summary) {
|
|
|
118
120
|
</html>`;
|
|
119
121
|
}
|
|
120
122
|
|
|
121
|
-
// src/
|
|
122
|
-
var
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
)
|
|
151
|
-
"--exclude-test-fixtures",
|
|
152
|
-
"Exclude test fixture duplication (beforeAll/afterAll)"
|
|
153
|
-
).option("--exclude-templates", "Exclude template file duplication").option(
|
|
154
|
-
"--include-tests",
|
|
155
|
-
"Include test files in analysis (excluded by default)"
|
|
156
|
-
).option(
|
|
157
|
-
"--max-results <number>",
|
|
158
|
-
"Maximum number of results to show in console output. Default: 10"
|
|
159
|
-
).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
|
|
160
|
-
"--min-cluster-tokens <number>",
|
|
161
|
-
"Minimum token cost for cluster reporting. Default: 1000"
|
|
162
|
-
).option(
|
|
163
|
-
"--min-cluster-files <number>",
|
|
164
|
-
"Minimum files for cluster reporting. Default: 3"
|
|
165
|
-
).option(
|
|
166
|
-
"--show-raw-duplicates",
|
|
167
|
-
"Show raw duplicates instead of grouped view"
|
|
168
|
-
).option(
|
|
169
|
-
"-o, --output <format>",
|
|
170
|
-
"Output format: console, json, html",
|
|
171
|
-
"console"
|
|
172
|
-
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
123
|
+
// src/constants.ts
|
|
124
|
+
var DEFAULT_MIN_SIMILARITY = 0.4;
|
|
125
|
+
var DEFAULT_MIN_LINES = 5;
|
|
126
|
+
var DEFAULT_BATCH_SIZE = 100;
|
|
127
|
+
var DEFAULT_MIN_SHARED_TOKENS = 8;
|
|
128
|
+
var DEFAULT_MAX_CANDIDATES_PER_BLOCK = 100;
|
|
129
|
+
var DEFAULT_MAX_RESULTS = 10;
|
|
130
|
+
var DEFAULT_MIN_CLUSTER_TOKEN_COST = 1e3;
|
|
131
|
+
var DEFAULT_MIN_CLUSTER_FILES = 3;
|
|
132
|
+
var COMMAND_NAME = "aiready-patterns";
|
|
133
|
+
var COMMAND_VERSION = "0.1.0";
|
|
134
|
+
var DEFAULT_OUTPUT_FORMAT = "console";
|
|
135
|
+
var HELP_TEXT_AFTER = `
|
|
136
|
+
CONFIGURATION:
|
|
137
|
+
Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js
|
|
138
|
+
CLI options override config file settings
|
|
139
|
+
|
|
140
|
+
PARAMETER TUNING:
|
|
141
|
+
If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens
|
|
142
|
+
If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates
|
|
143
|
+
If you get too many false positives: increase --similarity or --min-lines
|
|
144
|
+
|
|
145
|
+
EXAMPLES:
|
|
146
|
+
aiready-patterns . # Basic analysis with smart defaults
|
|
147
|
+
aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection
|
|
148
|
+
aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough
|
|
149
|
+
aiready-patterns . --output json > report.json # JSON export`;
|
|
150
|
+
|
|
151
|
+
// src/cli-action.ts
|
|
152
|
+
async function patternActionHandler(directory, options) {
|
|
173
153
|
console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
174
154
|
const startTime = Date.now();
|
|
175
155
|
const config = await loadConfig(directory);
|
|
176
156
|
const defaults = {
|
|
177
|
-
minSimilarity:
|
|
178
|
-
minLines:
|
|
179
|
-
batchSize:
|
|
157
|
+
minSimilarity: DEFAULT_MIN_SIMILARITY,
|
|
158
|
+
minLines: DEFAULT_MIN_LINES,
|
|
159
|
+
batchSize: DEFAULT_BATCH_SIZE,
|
|
180
160
|
approx: true,
|
|
181
|
-
minSharedTokens:
|
|
182
|
-
maxCandidatesPerBlock:
|
|
161
|
+
minSharedTokens: DEFAULT_MIN_SHARED_TOKENS,
|
|
162
|
+
maxCandidatesPerBlock: DEFAULT_MAX_CANDIDATES_PER_BLOCK,
|
|
183
163
|
streamResults: true,
|
|
184
164
|
include: void 0,
|
|
185
165
|
exclude: void 0,
|
|
166
|
+
excludePatterns: void 0,
|
|
167
|
+
confidenceThreshold: 0,
|
|
168
|
+
ignoreWhitelist: void 0,
|
|
186
169
|
minSeverity: Severity.Minor,
|
|
187
170
|
excludeTestFixtures: false,
|
|
188
171
|
excludeTemplates: false,
|
|
189
172
|
includeTests: false,
|
|
190
|
-
maxResults:
|
|
173
|
+
maxResults: DEFAULT_MAX_RESULTS,
|
|
191
174
|
groupByFilePair: true,
|
|
192
175
|
createClusters: true,
|
|
193
|
-
minClusterTokenCost:
|
|
194
|
-
minClusterFiles:
|
|
176
|
+
minClusterTokenCost: DEFAULT_MIN_CLUSTER_TOKEN_COST,
|
|
177
|
+
minClusterFiles: DEFAULT_MIN_CLUSTER_FILES,
|
|
195
178
|
showRawDuplicates: false
|
|
196
179
|
};
|
|
197
180
|
const mergedConfig = mergeConfigWithDefaults(config, defaults);
|
|
@@ -207,15 +190,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
207
190
|
streamResults: options.streamResults !== false && mergedConfig.streamResults,
|
|
208
191
|
include: options.include?.split(",") || mergedConfig.include,
|
|
209
192
|
exclude: options.exclude?.split(",") || mergedConfig.exclude,
|
|
193
|
+
excludePatterns: options.excludePatterns?.split(",") || mergedConfig.excludePatterns,
|
|
194
|
+
confidenceThreshold: options.confidenceThreshold ? parseFloat(options.confidenceThreshold) : mergedConfig.confidenceThreshold,
|
|
195
|
+
ignoreWhitelist: options.ignoreWhitelist?.split(",") || mergedConfig.ignoreWhitelist,
|
|
210
196
|
minSeverity: options.minSeverity || mergedConfig.minSeverity,
|
|
211
197
|
excludeTestFixtures: options.excludeTestFixtures || mergedConfig.excludeTestFixtures,
|
|
212
198
|
excludeTemplates: options.excludeTemplates || mergedConfig.excludeTemplates,
|
|
213
199
|
includeTests: options.includeTests || mergedConfig.includeTests,
|
|
214
200
|
maxResults: options.maxResults ? parseInt(options.maxResults) : mergedConfig.maxResults,
|
|
215
|
-
groupByFilePair: options.
|
|
216
|
-
createClusters: options.
|
|
217
|
-
minClusterTokenCost: options.
|
|
218
|
-
minClusterFiles: options.
|
|
201
|
+
groupByFilePair: options.groupByFilePair !== false && mergedConfig.groupByFilePair,
|
|
202
|
+
createClusters: options.createClusters !== false && mergedConfig.createClusters,
|
|
203
|
+
minClusterTokenCost: options.minClusterTokens ? parseInt(options.minClusterTokens) : mergedConfig.minClusterTokenCost,
|
|
204
|
+
minClusterFiles: options.minClusterFiles ? parseInt(options.minClusterFiles) : mergedConfig.minClusterFiles,
|
|
219
205
|
showRawDuplicates: options.showRawDuplicates || mergedConfig.showRawDuplicates
|
|
220
206
|
};
|
|
221
207
|
if (finalOptions.includeTests && finalOptions.exclude) {
|
|
@@ -301,9 +287,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
301
287
|
console.log(chalk.cyan(divider));
|
|
302
288
|
console.log(chalk.bold.white(" PATTERN ANALYSIS SUMMARY"));
|
|
303
289
|
console.log(chalk.cyan(divider) + "\n");
|
|
304
|
-
console.log(
|
|
305
|
-
chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
|
|
306
|
-
);
|
|
290
|
+
console.log(chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`));
|
|
307
291
|
console.log(
|
|
308
292
|
chalk.yellow(
|
|
309
293
|
`\u26A0 AI confusion patterns detected: ${chalk.bold(totalIssues)}`
|
|
@@ -314,9 +298,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
314
298
|
`\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
|
|
315
299
|
)
|
|
316
300
|
);
|
|
317
|
-
console.log(
|
|
318
|
-
chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`)
|
|
319
|
-
);
|
|
301
|
+
console.log(chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`));
|
|
320
302
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
321
303
|
if (sortedTypes.length > 0) {
|
|
322
304
|
console.log(chalk.cyan("\n" + divider));
|
|
@@ -382,9 +364,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
382
364
|
console.log(chalk.cyan(divider) + "\n");
|
|
383
365
|
clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
|
|
384
366
|
const severityBadge = getSeverityBadge2(cluster.severity);
|
|
385
|
-
console.log(
|
|
386
|
-
`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
|
|
387
|
-
);
|
|
367
|
+
console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`);
|
|
388
368
|
console.log(
|
|
389
369
|
` Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
|
|
390
370
|
);
|
|
@@ -466,10 +446,8 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
466
446
|
chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
|
|
467
447
|
);
|
|
468
448
|
console.log(` ${chalk.dim(issue.message)}`);
|
|
469
|
-
console.log(
|
|
470
|
-
|
|
471
|
-
`
|
|
472
|
-
);
|
|
449
|
+
console.log(` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
|
|
450
|
+
`);
|
|
473
451
|
});
|
|
474
452
|
}
|
|
475
453
|
if (totalIssues === 0) {
|
|
@@ -479,9 +457,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
479
457
|
"\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
|
|
480
458
|
)
|
|
481
459
|
);
|
|
482
|
-
console.log(
|
|
483
|
-
chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
|
|
484
|
-
);
|
|
460
|
+
console.log(chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3"));
|
|
485
461
|
console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
|
|
486
462
|
console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
|
|
487
463
|
console.log(
|
|
@@ -493,9 +469,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
493
469
|
console.log(
|
|
494
470
|
chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
|
|
495
471
|
);
|
|
496
|
-
console.log(
|
|
497
|
-
chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
|
|
498
|
-
);
|
|
472
|
+
console.log(chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3"));
|
|
499
473
|
console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
|
|
500
474
|
console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
|
|
501
475
|
console.log(
|
|
@@ -522,5 +496,64 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
522
496
|
"\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
|
|
523
497
|
)
|
|
524
498
|
);
|
|
525
|
-
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// src/cli.ts
|
|
502
|
+
var program = new Command();
|
|
503
|
+
program.name(COMMAND_NAME).description("Detect duplicate patterns in your codebase").version(COMMAND_VERSION).addHelpText("after", HELP_TEXT_AFTER).argument("<directory>", "Directory to analyze").option(
|
|
504
|
+
"-s, --similarity <number>",
|
|
505
|
+
`Minimum similarity score (0-1). Default: ${DEFAULT_MIN_SIMILARITY}`
|
|
506
|
+
).option(
|
|
507
|
+
"-l, --min-lines <number>",
|
|
508
|
+
`Minimum lines to consider. Default: ${DEFAULT_MIN_LINES}`
|
|
509
|
+
).option(
|
|
510
|
+
"--batch-size <number>",
|
|
511
|
+
`Batch size for comparisons. Default: ${DEFAULT_BATCH_SIZE}`
|
|
512
|
+
).option(
|
|
513
|
+
"--no-approx",
|
|
514
|
+
"Disable approximate candidate selection. Slower but more thorough on small repos"
|
|
515
|
+
).option(
|
|
516
|
+
"--min-shared-tokens <number>",
|
|
517
|
+
`Minimum shared tokens to consider a candidate. Default: ${DEFAULT_MIN_SHARED_TOKENS}`
|
|
518
|
+
).option(
|
|
519
|
+
"--max-candidates <number>",
|
|
520
|
+
`Maximum candidates per block. Default: ${DEFAULT_MAX_CANDIDATES_PER_BLOCK}`
|
|
521
|
+
).option(
|
|
522
|
+
"--no-stream-results",
|
|
523
|
+
"Disable incremental output (default: enabled)"
|
|
524
|
+
).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
525
|
+
"--exclude-patterns <regexes>",
|
|
526
|
+
"Regex patterns to exclude specific code content (comma-separated)"
|
|
527
|
+
).option(
|
|
528
|
+
"--confidence-threshold <number>",
|
|
529
|
+
"Minimum confidence score (0-1). Default: 0"
|
|
530
|
+
).option(
|
|
531
|
+
"--ignore-whitelist <patterns>",
|
|
532
|
+
"List of file pairs or patterns to ignore (comma-separated)"
|
|
533
|
+
).option(
|
|
534
|
+
"--min-severity <level>",
|
|
535
|
+
"Minimum severity to show: critical|major|minor|info. Default: minor"
|
|
536
|
+
).option(
|
|
537
|
+
"--exclude-test-fixtures",
|
|
538
|
+
"Exclude test fixture duplication (beforeAll/afterAll)"
|
|
539
|
+
).option("--exclude-templates", "Exclude template file duplication").option(
|
|
540
|
+
"--include-tests",
|
|
541
|
+
"Include test files in analysis (excluded by default)"
|
|
542
|
+
).option(
|
|
543
|
+
"--max-results <number>",
|
|
544
|
+
`Maximum number of results to show in console output. Default: ${DEFAULT_MAX_RESULTS}`
|
|
545
|
+
).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
|
|
546
|
+
"--min-cluster-tokens <number>",
|
|
547
|
+
`Minimum token cost for cluster reporting. Default: ${DEFAULT_MIN_CLUSTER_TOKEN_COST}`
|
|
548
|
+
).option(
|
|
549
|
+
"--min-cluster-files <number>",
|
|
550
|
+
`Minimum files for cluster reporting. Default: ${DEFAULT_MIN_CLUSTER_FILES}`
|
|
551
|
+
).option(
|
|
552
|
+
"--show-raw-duplicates",
|
|
553
|
+
"Show raw duplicates instead of grouped view"
|
|
554
|
+
).option(
|
|
555
|
+
"-o, --output <format>",
|
|
556
|
+
"Output format: console, json, html",
|
|
557
|
+
DEFAULT_OUTPUT_FORMAT
|
|
558
|
+
).option("--output-file <path>", "Output file path (for json/html)").action(patternActionHandler);
|
|
526
559
|
program.parse();
|
package/dist/index.d.mts
CHANGED
|
@@ -17,6 +17,7 @@ interface DuplicatePattern {
|
|
|
17
17
|
code1: string;
|
|
18
18
|
code2: string;
|
|
19
19
|
similarity: number;
|
|
20
|
+
confidence: number;
|
|
20
21
|
patternType: PatternType;
|
|
21
22
|
tokenCost: number;
|
|
22
23
|
severity: Severity;
|
|
@@ -32,6 +33,9 @@ interface DetectionOptions {
|
|
|
32
33
|
minSharedTokens: number;
|
|
33
34
|
maxCandidatesPerBlock: number;
|
|
34
35
|
streamResults: boolean;
|
|
36
|
+
excludePatterns?: string[];
|
|
37
|
+
confidenceThreshold?: number;
|
|
38
|
+
ignoreWhitelist?: string[];
|
|
35
39
|
onProgress?: (processed: number, total: number, message: string) => void;
|
|
36
40
|
}
|
|
37
41
|
|
|
@@ -108,6 +112,9 @@ interface PatternDetectOptions extends ScanOptions {
|
|
|
108
112
|
createClusters?: boolean;
|
|
109
113
|
minClusterTokenCost?: number;
|
|
110
114
|
minClusterFiles?: number;
|
|
115
|
+
excludePatterns?: string[];
|
|
116
|
+
confidenceThreshold?: number;
|
|
117
|
+
ignoreWhitelist?: string[];
|
|
111
118
|
onProgress?: (processed: number, total: number, message: string) => void;
|
|
112
119
|
}
|
|
113
120
|
interface PatternSummary {
|
package/dist/index.d.ts
CHANGED
|
@@ -17,6 +17,7 @@ interface DuplicatePattern {
|
|
|
17
17
|
code1: string;
|
|
18
18
|
code2: string;
|
|
19
19
|
similarity: number;
|
|
20
|
+
confidence: number;
|
|
20
21
|
patternType: PatternType;
|
|
21
22
|
tokenCost: number;
|
|
22
23
|
severity: Severity;
|
|
@@ -32,6 +33,9 @@ interface DetectionOptions {
|
|
|
32
33
|
minSharedTokens: number;
|
|
33
34
|
maxCandidatesPerBlock: number;
|
|
34
35
|
streamResults: boolean;
|
|
36
|
+
excludePatterns?: string[];
|
|
37
|
+
confidenceThreshold?: number;
|
|
38
|
+
ignoreWhitelist?: string[];
|
|
35
39
|
onProgress?: (processed: number, total: number, message: string) => void;
|
|
36
40
|
}
|
|
37
41
|
|
|
@@ -108,6 +112,9 @@ interface PatternDetectOptions extends ScanOptions {
|
|
|
108
112
|
createClusters?: boolean;
|
|
109
113
|
minClusterTokenCost?: number;
|
|
110
114
|
minClusterFiles?: number;
|
|
115
|
+
excludePatterns?: string[];
|
|
116
|
+
confidenceThreshold?: number;
|
|
117
|
+
ignoreWhitelist?: string[];
|
|
111
118
|
onProgress?: (processed: number, total: number, message: string) => void;
|
|
112
119
|
}
|
|
113
120
|
interface PatternSummary {
|
package/dist/index.js
CHANGED
|
@@ -350,14 +350,33 @@ function calculateSimilarity(a, b) {
|
|
|
350
350
|
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
351
351
|
return intersection.size / union.size;
|
|
352
352
|
}
|
|
353
|
+
function calculateConfidence(similarity, tokens, lines) {
|
|
354
|
+
let confidence = similarity;
|
|
355
|
+
if (lines > 20) confidence += 0.05;
|
|
356
|
+
if (tokens > 200) confidence += 0.05;
|
|
357
|
+
if (lines < 5) confidence -= 0.1;
|
|
358
|
+
return Math.max(0, Math.min(1, confidence));
|
|
359
|
+
}
|
|
353
360
|
async function detectDuplicatePatterns(fileContents, options) {
|
|
354
|
-
const {
|
|
361
|
+
const {
|
|
362
|
+
minSimilarity,
|
|
363
|
+
minLines,
|
|
364
|
+
streamResults,
|
|
365
|
+
onProgress,
|
|
366
|
+
excludePatterns = [],
|
|
367
|
+
confidenceThreshold = 0,
|
|
368
|
+
ignoreWhitelist = []
|
|
369
|
+
} = options;
|
|
355
370
|
const allBlocks = [];
|
|
371
|
+
const excludeRegexes = excludePatterns.map((p) => new RegExp(p, "i"));
|
|
356
372
|
for (const { file, content } of fileContents) {
|
|
357
373
|
const blocks = extractBlocks(file, content);
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
374
|
+
for (const b of blocks) {
|
|
375
|
+
if (b.endLine - b.startLine + 1 < minLines) continue;
|
|
376
|
+
const isExcluded = excludeRegexes.some((regex) => regex.test(b.code));
|
|
377
|
+
if (isExcluded) continue;
|
|
378
|
+
allBlocks.push(b);
|
|
379
|
+
}
|
|
361
380
|
}
|
|
362
381
|
const duplicates = [];
|
|
363
382
|
const totalBlocks = allBlocks.length;
|
|
@@ -388,10 +407,20 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
388
407
|
comparisons++;
|
|
389
408
|
const b2 = allBlocks[j];
|
|
390
409
|
if (b1.file === b2.file) continue;
|
|
410
|
+
const isWhitelisted = ignoreWhitelist.some((pattern) => {
|
|
411
|
+
return b1.file.includes(pattern) && b2.file.includes(pattern) || pattern === `${b1.file}::${b2.file}` || pattern === `${b2.file}::${b1.file}`;
|
|
412
|
+
});
|
|
413
|
+
if (isWhitelisted) continue;
|
|
391
414
|
const isPython2 = b2.file.toLowerCase().endsWith(".py");
|
|
392
415
|
const norm2 = normalizeCode(b2.code, isPython2);
|
|
393
416
|
const sim = calculateSimilarity(norm1, norm2);
|
|
394
417
|
if (sim >= minSimilarity) {
|
|
418
|
+
const confidence = calculateConfidence(
|
|
419
|
+
sim,
|
|
420
|
+
b1.tokens,
|
|
421
|
+
b1.endLine - b1.startLine + 1
|
|
422
|
+
);
|
|
423
|
+
if (confidence < confidenceThreshold) continue;
|
|
395
424
|
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
396
425
|
b1.file,
|
|
397
426
|
b2.file,
|
|
@@ -409,6 +438,7 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
409
438
|
code1: b1.code,
|
|
410
439
|
code2: b2.code,
|
|
411
440
|
similarity: sim,
|
|
441
|
+
confidence,
|
|
412
442
|
patternType: b1.patternType,
|
|
413
443
|
tokenCost: b1.tokens + b2.tokens,
|
|
414
444
|
severity,
|
|
@@ -419,7 +449,7 @@ async function detectDuplicatePatterns(fileContents, options) {
|
|
|
419
449
|
duplicates.push(dup);
|
|
420
450
|
if (streamResults)
|
|
421
451
|
console.log(
|
|
422
|
-
`[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%)`
|
|
452
|
+
`[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%, conf: ${Math.round(confidence * 100)}%)`
|
|
423
453
|
);
|
|
424
454
|
}
|
|
425
455
|
}
|
|
@@ -633,6 +663,17 @@ function logConfiguration(config, estimatedBlocks) {
|
|
|
633
663
|
console.log(` Min shared tokens: ${config.minSharedTokens}`);
|
|
634
664
|
console.log(` Severity filter: ${config.severity}`);
|
|
635
665
|
console.log(` Include tests: ${config.includeTests}`);
|
|
666
|
+
if (config.excludePatterns && config.excludePatterns.length > 0) {
|
|
667
|
+
console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
|
|
668
|
+
}
|
|
669
|
+
if (config.confidenceThreshold && config.confidenceThreshold > 0) {
|
|
670
|
+
console.log(` Confidence threshold: ${config.confidenceThreshold}`);
|
|
671
|
+
}
|
|
672
|
+
if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
|
|
673
|
+
console.log(
|
|
674
|
+
` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
|
|
675
|
+
);
|
|
676
|
+
}
|
|
636
677
|
console.log("");
|
|
637
678
|
}
|
|
638
679
|
async function analyzePatterns(options) {
|
|
@@ -651,6 +692,9 @@ async function analyzePatterns(options) {
|
|
|
651
692
|
createClusters = true,
|
|
652
693
|
minClusterTokenCost = 1e3,
|
|
653
694
|
minClusterFiles = 3,
|
|
695
|
+
excludePatterns = [],
|
|
696
|
+
confidenceThreshold = 0,
|
|
697
|
+
ignoreWhitelist = [],
|
|
654
698
|
...scanOptions
|
|
655
699
|
} = finalOptions;
|
|
656
700
|
const files = await (0, import_core4.scanFiles)(scanOptions);
|
|
@@ -677,6 +721,9 @@ async function analyzePatterns(options) {
|
|
|
677
721
|
minSharedTokens,
|
|
678
722
|
maxCandidatesPerBlock,
|
|
679
723
|
streamResults,
|
|
724
|
+
excludePatterns,
|
|
725
|
+
confidenceThreshold,
|
|
726
|
+
ignoreWhitelist,
|
|
680
727
|
onProgress: options.onProgress
|
|
681
728
|
});
|
|
682
729
|
for (const file of files) {
|
|
@@ -778,6 +825,8 @@ function generateSummary(results) {
|
|
|
778
825
|
}
|
|
779
826
|
],
|
|
780
827
|
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
828
|
+
confidence: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
829
|
+
// Fallback for summary
|
|
781
830
|
patternType: typeMatch?.[1] || "unknown",
|
|
782
831
|
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
783
832
|
};
|
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aiready/pattern-detect",
|
|
3
|
-
"version": "0.16.
|
|
3
|
+
"version": "0.16.7",
|
|
4
4
|
"description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"commander": "^14.0.0",
|
|
47
47
|
"chalk": "^5.3.0",
|
|
48
|
-
"@aiready/core": "0.23.
|
|
48
|
+
"@aiready/core": "0.23.8"
|
|
49
49
|
},
|
|
50
50
|
"devDependencies": {
|
|
51
51
|
"tsup": "^8.3.5",
|