@aiready/pattern-detect 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +249 -0
- package/dist/chunk-K5O2HVB5.mjs +114 -0
- package/dist/chunk-RLWJXASG.mjs +227 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +447 -0
- package/dist/cli.mjs +204 -0
- package/dist/index.d.mts +50 -0
- package/dist/index.d.ts +50 -0
- package/dist/index.js +253 -0
- package/dist/index.mjs +10 -0
- package/package.json +72 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AIReady Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# @aiready/pattern-detect
|
|
2
|
+
|
|
3
|
+
> **Semantic duplicate pattern detection for AI-generated code**
|
|
4
|
+
|
|
5
|
+
When AI tools generate code without awareness of existing patterns in your codebase, you end up with semantically similar but syntactically different implementations. This tool finds those patterns and quantifies their cost.
|
|
6
|
+
|
|
7
|
+
## 🎯 Why This Tool?
|
|
8
|
+
|
|
9
|
+
### The AI Code Problem
|
|
10
|
+
|
|
11
|
+
AI coding assistants (GitHub Copilot, ChatGPT, Claude) generate functionally similar code in different ways because:
|
|
12
|
+
- No awareness of existing patterns in your codebase
|
|
13
|
+
- Different AI models have different coding styles
|
|
14
|
+
- Team members use AI tools with varying contexts
|
|
15
|
+
- AI can't see your full codebase (context window limits)
|
|
16
|
+
|
|
17
|
+
### What Makes Us Different?
|
|
18
|
+
|
|
19
|
+
| Feature | jscpd | @aiready/pattern-detect |
|
|
20
|
+
|---------|-------|------------------------|
|
|
21
|
+
| Detection Method | Byte-level exact matching | Semantic similarity |
|
|
22
|
+
| Pattern Types | Generic blocks | Categorized (API, validators, utils, etc.) |
|
|
23
|
+
| Token Cost | ❌ No | ✅ Yes - shows AI context waste |
|
|
24
|
+
| Refactoring Suggestions | ❌ Generic | ✅ Specific to pattern type |
|
|
25
|
+
| Output Formats | Text/JSON | Console/JSON/HTML with rich formatting |
|
|
26
|
+
|
|
27
|
+
## 🚀 Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npm install -g @aiready/pattern-detect
|
|
31
|
+
|
|
32
|
+
# Or use directly with npx
|
|
33
|
+
npx @aiready/pattern-detect ./src
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## 📊 Usage
|
|
37
|
+
|
|
38
|
+
### CLI
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Basic usage
|
|
42
|
+
aiready-patterns ./src
|
|
43
|
+
|
|
44
|
+
# Adjust sensitivity
|
|
45
|
+
aiready-patterns ./src --similarity 0.9
|
|
46
|
+
|
|
47
|
+
# Only look at larger patterns
|
|
48
|
+
aiready-patterns ./src --min-lines 10
|
|
49
|
+
|
|
50
|
+
# Export to JSON
|
|
51
|
+
aiready-patterns ./src --output json --output-file report.json
|
|
52
|
+
|
|
53
|
+
# Generate HTML report
|
|
54
|
+
aiready-patterns ./src --output html
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Programmatic API
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
import { analyzePatterns, generateSummary } from '@aiready/pattern-detect';
|
|
61
|
+
|
|
62
|
+
const results = await analyzePatterns({
|
|
63
|
+
rootDir: './src',
|
|
64
|
+
minSimilarity: 0.85, // 85% similar
|
|
65
|
+
minLines: 5,
|
|
66
|
+
include: ['**/*.ts', '**/*.tsx'],
|
|
67
|
+
exclude: ['**/*.test.ts', '**/node_modules/**'],
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
const summary = generateSummary(results);
|
|
71
|
+
|
|
72
|
+
console.log(`Found ${summary.totalPatterns} duplicate patterns`);
|
|
73
|
+
console.log(`Token cost: ${summary.totalTokenCost} tokens wasted`);
|
|
74
|
+
console.log(`Pattern breakdown:`, summary.patternsByType);
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 🔍 Real-World Example
|
|
78
|
+
|
|
79
|
+
### Before Analysis
|
|
80
|
+
|
|
81
|
+
Two API handlers that were written by AI on different days:
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
// File: src/api/users.ts
|
|
85
|
+
app.get('/api/users/:id', async (request, response) => {
|
|
86
|
+
const user = await db.users.findOne({ id: request.params.id });
|
|
87
|
+
if (!user) {
|
|
88
|
+
return response.status(404).json({ error: 'User not found' });
|
|
89
|
+
}
|
|
90
|
+
response.json(user);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// File: src/api/posts.ts
|
|
94
|
+
router.get('/posts/:id', async (req, res) => {
|
|
95
|
+
const post = await database.posts.findOne({ id: req.params.id });
|
|
96
|
+
if (!post) {
|
|
97
|
+
res.status(404).send({ message: 'Post not found' });
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
res.json(post);
|
|
101
|
+
});
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Analysis Output
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
108
|
+
PATTERN ANALYSIS SUMMARY
|
|
109
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
110
|
+
|
|
111
|
+
📁 Files analyzed: 47
|
|
112
|
+
⚠ Duplicate patterns found: 23
|
|
113
|
+
💰 Token cost (wasted): 8,450
|
|
114
|
+
|
|
115
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
116
|
+
PATTERNS BY TYPE
|
|
117
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
118
|
+
|
|
119
|
+
🌐 api-handler 12
|
|
120
|
+
✓ validator 8
|
|
121
|
+
🔧 utility 3
|
|
122
|
+
|
|
123
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
124
|
+
TOP DUPLICATE PATTERNS
|
|
125
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
126
|
+
|
|
127
|
+
1. 87% 🌐 api-handler
|
|
128
|
+
src/api/users.ts:15
|
|
129
|
+
↔ src/api/posts.ts:22
|
|
130
|
+
432 tokens wasted
|
|
131
|
+
|
|
132
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
133
|
+
CRITICAL ISSUES (>95% similar)
|
|
134
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
135
|
+
|
|
136
|
+
● src/utils/validators.ts:15
|
|
137
|
+
validator pattern 97% similar to src/utils/checks.ts (125 tokens wasted)
|
|
138
|
+
→ Consolidate validation logic into shared schema validators (Zod/Yup) (CRITICAL: Nearly identical code)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Suggested Refactoring
|
|
142
|
+
|
|
143
|
+
Create a generic handler:
|
|
144
|
+
|
|
145
|
+
```typescript
|
|
146
|
+
// utils/apiHandler.ts
|
|
147
|
+
export const createResourceHandler = (resourceName: string, findFn: Function) => {
|
|
148
|
+
return async (req: Request, res: Response) => {
|
|
149
|
+
const item = await findFn({ id: req.params.id });
|
|
150
|
+
if (!item) {
|
|
151
|
+
return res.status(404).json({ error: `${resourceName} not found` });
|
|
152
|
+
}
|
|
153
|
+
res.json(item);
|
|
154
|
+
};
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
// src/api/users.ts
|
|
158
|
+
app.get('/api/users/:id', createResourceHandler('User', db.users.findOne));
|
|
159
|
+
|
|
160
|
+
// src/api/posts.ts
|
|
161
|
+
router.get('/posts/:id', createResourceHandler('Post', database.posts.findOne));
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Result:** Reduced from 432 tokens to ~100 tokens in AI context.
|
|
165
|
+
|
|
166
|
+
## ⚙️ Configuration
|
|
167
|
+
|
|
168
|
+
| Option | Description | Default |
|
|
169
|
+
|--------|-------------|---------|
|
|
170
|
+
| `minSimilarity` | Similarity threshold (0-1) | `0.85` |
|
|
171
|
+
| `minLines` | Minimum lines to consider a pattern | `5` |
|
|
172
|
+
| `include` | File patterns to include | `['**/*.ts', '**/*.js']` |
|
|
173
|
+
| `exclude` | File patterns to exclude | `['**/node_modules/**', '**/*.test.*']` |
|
|
174
|
+
|
|
175
|
+
## 📈 Understanding the Output
|
|
176
|
+
|
|
177
|
+
### Severity Levels
|
|
178
|
+
|
|
179
|
+
- **CRITICAL (>95% similar)**: Nearly identical code - refactor immediately
|
|
180
|
+
- **MAJOR (>90% similar)**: Very similar - refactor soon
|
|
181
|
+
- **MINOR (>85% similar)**: Similar - consider refactoring
|
|
182
|
+
|
|
183
|
+
### Pattern Types
|
|
184
|
+
|
|
185
|
+
- **🌐 api-handler**: REST API endpoints, route handlers
|
|
186
|
+
- **✓ validator**: Input validation, schema checks
|
|
187
|
+
- **🔧 utility**: Pure utility functions
|
|
188
|
+
- **📦 class-method**: Class methods with similar logic
|
|
189
|
+
- **⚛️ component**: UI components (React, Vue, etc.)
|
|
190
|
+
- **ƒ function**: Generic functions
|
|
191
|
+
|
|
192
|
+
### Token Cost
|
|
193
|
+
|
|
194
|
+
Estimated tokens wasted when AI tools process duplicate code:
|
|
195
|
+
- Increases context window usage
|
|
196
|
+
- Higher API costs for AI-powered tools
|
|
197
|
+
- Slower analysis and generation
|
|
198
|
+
- More potential for AI confusion
|
|
199
|
+
|
|
200
|
+
## 🎓 Best Practices
|
|
201
|
+
|
|
202
|
+
1. **Run regularly**: Integrate into CI/CD to catch new duplicates early
|
|
203
|
+
2. **Start with high similarity**: Use `--similarity 0.9` to find obvious wins
|
|
204
|
+
3. **Focus on critical issues**: Fix >95% similar patterns first
|
|
205
|
+
4. **Use pattern types**: Prioritize refactoring by category (API handlers → validators → utilities)
|
|
206
|
+
5. **Export reports**: Generate HTML reports for team reviews
|
|
207
|
+
|
|
208
|
+
## 🔧 CI/CD Integration
|
|
209
|
+
|
|
210
|
+
### GitHub Actions
|
|
211
|
+
|
|
212
|
+
```yaml
|
|
213
|
+
name: Pattern Detection
|
|
214
|
+
|
|
215
|
+
on: [pull_request]
|
|
216
|
+
|
|
217
|
+
jobs:
|
|
218
|
+
detect-patterns:
|
|
219
|
+
runs-on: ubuntu-latest
|
|
220
|
+
steps:
|
|
221
|
+
- uses: actions/checkout@v3
|
|
222
|
+
- uses: actions/setup-node@v3
|
|
223
|
+
- run: npx @aiready/pattern-detect ./src --output json --output-file patterns.json
|
|
224
|
+
- name: Check for critical issues
|
|
225
|
+
run: |
|
|
226
|
+
CRITICAL=$(jq '.summary.topDuplicates | map(select(.similarity > 0.95)) | length' patterns.json)
|
|
227
|
+
if [ "$CRITICAL" -gt "0" ]; then
|
|
228
|
+
echo "Found $CRITICAL critical duplicate patterns"
|
|
229
|
+
exit 1
|
|
230
|
+
fi
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## 🤝 Contributing
|
|
234
|
+
|
|
235
|
+
We welcome contributions! This tool is part of the [AIReady](https://github.com/aiready/aiready) ecosystem.
|
|
236
|
+
|
|
237
|
+
## 📝 License
|
|
238
|
+
|
|
239
|
+
MIT - See LICENSE file
|
|
240
|
+
|
|
241
|
+
## 🔗 Related Tools
|
|
242
|
+
|
|
243
|
+
- **[@aiready/context-analyzer](../context-analyzer)** - Analyze token costs and context fragmentation
|
|
244
|
+
- **[@aiready/doc-drift](../doc-drift)** - Track documentation freshness
|
|
245
|
+
- **[@aiready/consistency](../consistency)** - Check naming pattern consistency
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
**Made with 💙 by the AIReady team** | [Docs](https://aiready.dev/docs) | [GitHub](https://github.com/aiready/aiready)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import { scanFiles, readFileContent } from "@aiready/core";
|
|
3
|
+
|
|
4
|
+
// src/detector.ts
|
|
5
|
+
import { similarityScore } from "@aiready/core";
|
|
6
|
+
function extractCodeBlocks(content, minLines) {
|
|
7
|
+
const lines = content.split("\n");
|
|
8
|
+
const blocks = [];
|
|
9
|
+
let currentBlock = [];
|
|
10
|
+
let blockStart = 0;
|
|
11
|
+
let braceDepth = 0;
|
|
12
|
+
for (let i = 0; i < lines.length; i++) {
|
|
13
|
+
const line = lines[i];
|
|
14
|
+
for (const char of line) {
|
|
15
|
+
if (char === "{") braceDepth++;
|
|
16
|
+
if (char === "}") braceDepth--;
|
|
17
|
+
}
|
|
18
|
+
currentBlock.push(line);
|
|
19
|
+
if (braceDepth === 0 && currentBlock.length >= minLines) {
|
|
20
|
+
blocks.push({
|
|
21
|
+
content: currentBlock.join("\n"),
|
|
22
|
+
startLine: blockStart + 1
|
|
23
|
+
});
|
|
24
|
+
currentBlock = [];
|
|
25
|
+
blockStart = i + 1;
|
|
26
|
+
} else if (braceDepth === 0) {
|
|
27
|
+
currentBlock = [];
|
|
28
|
+
blockStart = i + 1;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return blocks;
|
|
32
|
+
}
|
|
33
|
+
function normalizeCode(code) {
|
|
34
|
+
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/\s+/g, " ").trim();
|
|
35
|
+
}
|
|
36
|
+
function detectDuplicatePatterns(files, options) {
|
|
37
|
+
const { minSimilarity, minLines } = options;
|
|
38
|
+
const duplicates = [];
|
|
39
|
+
const allBlocks = files.flatMap(
|
|
40
|
+
(file) => extractCodeBlocks(file.content, minLines).map((block) => ({
|
|
41
|
+
...block,
|
|
42
|
+
file: file.file,
|
|
43
|
+
normalized: normalizeCode(block.content)
|
|
44
|
+
}))
|
|
45
|
+
);
|
|
46
|
+
for (let i = 0; i < allBlocks.length; i++) {
|
|
47
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
48
|
+
const block1 = allBlocks[i];
|
|
49
|
+
const block2 = allBlocks[j];
|
|
50
|
+
if (block1.file === block2.file) continue;
|
|
51
|
+
const similarity = similarityScore(block1.normalized, block2.normalized);
|
|
52
|
+
if (similarity >= minSimilarity) {
|
|
53
|
+
duplicates.push({
|
|
54
|
+
file1: block1.file,
|
|
55
|
+
file2: block2.file,
|
|
56
|
+
line1: block1.startLine,
|
|
57
|
+
line2: block2.startLine,
|
|
58
|
+
similarity,
|
|
59
|
+
snippet: block1.content.split("\n").slice(0, 3).join("\n") + "..."
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// src/index.ts
|
|
68
|
+
async function analyzePatterns(options) {
|
|
69
|
+
const {
|
|
70
|
+
minSimilarity = 0.85,
|
|
71
|
+
minLines = 5,
|
|
72
|
+
...scanOptions
|
|
73
|
+
} = options;
|
|
74
|
+
const files = await scanFiles(scanOptions);
|
|
75
|
+
const results = [];
|
|
76
|
+
const fileContents = await Promise.all(
|
|
77
|
+
files.map(async (file) => ({
|
|
78
|
+
file,
|
|
79
|
+
content: await readFileContent(file)
|
|
80
|
+
}))
|
|
81
|
+
);
|
|
82
|
+
const duplicates = detectDuplicatePatterns(fileContents, {
|
|
83
|
+
minSimilarity,
|
|
84
|
+
minLines
|
|
85
|
+
});
|
|
86
|
+
for (const file of files) {
|
|
87
|
+
const fileDuplicates = duplicates.filter(
|
|
88
|
+
(dup) => dup.file1 === file || dup.file2 === file
|
|
89
|
+
);
|
|
90
|
+
const issues = fileDuplicates.map((dup) => ({
|
|
91
|
+
type: "duplicate-pattern",
|
|
92
|
+
severity: dup.similarity > 0.95 ? "critical" : "major",
|
|
93
|
+
message: `Similar pattern found in ${dup.file1 === file ? dup.file2 : dup.file1}`,
|
|
94
|
+
location: {
|
|
95
|
+
file,
|
|
96
|
+
line: dup.file1 === file ? dup.line1 : dup.line2
|
|
97
|
+
},
|
|
98
|
+
suggestion: "Consider extracting common logic into a shared utility"
|
|
99
|
+
}));
|
|
100
|
+
results.push({
|
|
101
|
+
fileName: file,
|
|
102
|
+
issues,
|
|
103
|
+
metrics: {
|
|
104
|
+
consistencyScore: 1 - fileDuplicates.length * 0.1
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
return results;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export {
|
|
112
|
+
detectDuplicatePatterns,
|
|
113
|
+
analyzePatterns
|
|
114
|
+
};
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import { scanFiles, readFileContent } from "@aiready/core";
|
|
3
|
+
|
|
4
|
+
// src/detector.ts
|
|
5
|
+
import { similarityScore, estimateTokens } from "@aiready/core";
|
|
6
|
+
function categorizePattern(code) {
|
|
7
|
+
const lower = code.toLowerCase();
|
|
8
|
+
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
9
|
+
return "api-handler";
|
|
10
|
+
}
|
|
11
|
+
if (lower.includes("validate") || lower.includes("schema") || lower.includes("zod") || lower.includes("yup") || lower.includes("if") && lower.includes("throw")) {
|
|
12
|
+
return "validator";
|
|
13
|
+
}
|
|
14
|
+
if (lower.includes("return (") || lower.includes("jsx") || lower.includes("component") || lower.includes("props")) {
|
|
15
|
+
return "component";
|
|
16
|
+
}
|
|
17
|
+
if (lower.includes("class ") || lower.includes("this.")) {
|
|
18
|
+
return "class-method";
|
|
19
|
+
}
|
|
20
|
+
if (lower.includes("return ") && !lower.includes("this") && !lower.includes("new ")) {
|
|
21
|
+
return "utility";
|
|
22
|
+
}
|
|
23
|
+
if (lower.includes("function") || lower.includes("=>")) {
|
|
24
|
+
return "function";
|
|
25
|
+
}
|
|
26
|
+
return "unknown";
|
|
27
|
+
}
|
|
28
|
+
function extractCodeBlocks(content, minLines) {
|
|
29
|
+
const lines = content.split("\n");
|
|
30
|
+
const blocks = [];
|
|
31
|
+
let currentBlock = [];
|
|
32
|
+
let blockStart = 0;
|
|
33
|
+
let braceDepth = 0;
|
|
34
|
+
let inFunction = false;
|
|
35
|
+
for (let i = 0; i < lines.length; i++) {
|
|
36
|
+
const line = lines[i];
|
|
37
|
+
const trimmed = line.trim();
|
|
38
|
+
if (!inFunction && (trimmed.includes("function ") || trimmed.includes("=>") || trimmed.includes("async ") || /^(export\s+)?(async\s+)?function\s+/.test(trimmed) || /^(export\s+)?const\s+\w+\s*=\s*(async\s*)?\(/.test(trimmed))) {
|
|
39
|
+
inFunction = true;
|
|
40
|
+
blockStart = i;
|
|
41
|
+
}
|
|
42
|
+
for (const char of line) {
|
|
43
|
+
if (char === "{") braceDepth++;
|
|
44
|
+
if (char === "}") braceDepth--;
|
|
45
|
+
}
|
|
46
|
+
if (inFunction) {
|
|
47
|
+
currentBlock.push(line);
|
|
48
|
+
}
|
|
49
|
+
if (inFunction && braceDepth === 0 && currentBlock.length >= minLines) {
|
|
50
|
+
const blockContent = currentBlock.join("\n");
|
|
51
|
+
const linesOfCode = currentBlock.filter(
|
|
52
|
+
(l) => l.trim() && !l.trim().startsWith("//")
|
|
53
|
+
).length;
|
|
54
|
+
blocks.push({
|
|
55
|
+
content: blockContent,
|
|
56
|
+
startLine: blockStart + 1,
|
|
57
|
+
patternType: categorizePattern(blockContent),
|
|
58
|
+
linesOfCode
|
|
59
|
+
});
|
|
60
|
+
currentBlock = [];
|
|
61
|
+
inFunction = false;
|
|
62
|
+
} else if (inFunction && braceDepth === 0) {
|
|
63
|
+
currentBlock = [];
|
|
64
|
+
inFunction = false;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return blocks;
|
|
68
|
+
}
|
|
69
|
+
function normalizeCode(code) {
|
|
70
|
+
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
71
|
+
}
|
|
72
|
+
function calculateSimilarity(block1, block2) {
|
|
73
|
+
const norm1 = normalizeCode(block1);
|
|
74
|
+
const norm2 = normalizeCode(block2);
|
|
75
|
+
const baseSimilarity = similarityScore(norm1, norm2);
|
|
76
|
+
const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
77
|
+
const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
78
|
+
const tokenSimilarity = similarityScore(tokens1.join(" "), tokens2.join(" "));
|
|
79
|
+
return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
|
|
80
|
+
}
|
|
81
|
+
function detectDuplicatePatterns(files, options) {
|
|
82
|
+
const { minSimilarity, minLines } = options;
|
|
83
|
+
const duplicates = [];
|
|
84
|
+
const allBlocks = files.flatMap(
|
|
85
|
+
(file) => extractCodeBlocks(file.content, minLines).map((block) => ({
|
|
86
|
+
...block,
|
|
87
|
+
file: file.file,
|
|
88
|
+
normalized: normalizeCode(block.content),
|
|
89
|
+
tokenCost: estimateTokens(block.content)
|
|
90
|
+
}))
|
|
91
|
+
);
|
|
92
|
+
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
93
|
+
for (let i = 0; i < allBlocks.length; i++) {
|
|
94
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
95
|
+
const block1 = allBlocks[i];
|
|
96
|
+
const block2 = allBlocks[j];
|
|
97
|
+
if (block1.file === block2.file) continue;
|
|
98
|
+
const similarity = calculateSimilarity(block1.content, block2.content);
|
|
99
|
+
if (similarity >= minSimilarity) {
|
|
100
|
+
duplicates.push({
|
|
101
|
+
file1: block1.file,
|
|
102
|
+
file2: block2.file,
|
|
103
|
+
line1: block1.startLine,
|
|
104
|
+
line2: block2.startLine,
|
|
105
|
+
similarity,
|
|
106
|
+
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
107
|
+
patternType: block1.patternType,
|
|
108
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
109
|
+
linesOfCode: block1.linesOfCode
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return duplicates.sort(
|
|
115
|
+
(a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// src/index.ts
|
|
120
|
+
function getRefactoringSuggestion(patternType, similarity) {
|
|
121
|
+
const baseMessages = {
|
|
122
|
+
"api-handler": "Extract common middleware or create a base handler class",
|
|
123
|
+
validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
|
|
124
|
+
utility: "Move to a shared utilities file and reuse across modules",
|
|
125
|
+
"class-method": "Consider inheritance or composition to share behavior",
|
|
126
|
+
component: "Extract shared logic into a custom hook or HOC",
|
|
127
|
+
function: "Extract into a shared helper function",
|
|
128
|
+
unknown: "Extract common logic into a reusable module"
|
|
129
|
+
};
|
|
130
|
+
const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
|
|
131
|
+
return baseMessages[patternType] + urgency;
|
|
132
|
+
}
|
|
133
|
+
async function analyzePatterns(options) {
|
|
134
|
+
const { minSimilarity = 0.85, minLines = 5, ...scanOptions } = options;
|
|
135
|
+
const files = await scanFiles(scanOptions);
|
|
136
|
+
const results = [];
|
|
137
|
+
const fileContents = await Promise.all(
|
|
138
|
+
files.map(async (file) => ({
|
|
139
|
+
file,
|
|
140
|
+
content: await readFileContent(file)
|
|
141
|
+
}))
|
|
142
|
+
);
|
|
143
|
+
const duplicates = detectDuplicatePatterns(fileContents, {
|
|
144
|
+
minSimilarity,
|
|
145
|
+
minLines
|
|
146
|
+
});
|
|
147
|
+
for (const file of files) {
|
|
148
|
+
const fileDuplicates = duplicates.filter(
|
|
149
|
+
(dup) => dup.file1 === file || dup.file2 === file
|
|
150
|
+
);
|
|
151
|
+
const issues = fileDuplicates.map((dup) => {
|
|
152
|
+
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
153
|
+
const severity = dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor";
|
|
154
|
+
return {
|
|
155
|
+
type: "duplicate-pattern",
|
|
156
|
+
severity,
|
|
157
|
+
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
158
|
+
location: {
|
|
159
|
+
file,
|
|
160
|
+
line: dup.file1 === file ? dup.line1 : dup.line2
|
|
161
|
+
},
|
|
162
|
+
suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
|
|
163
|
+
};
|
|
164
|
+
});
|
|
165
|
+
const totalTokenCost = fileDuplicates.reduce(
|
|
166
|
+
(sum, dup) => sum + dup.tokenCost,
|
|
167
|
+
0
|
|
168
|
+
);
|
|
169
|
+
results.push({
|
|
170
|
+
fileName: file,
|
|
171
|
+
issues,
|
|
172
|
+
metrics: {
|
|
173
|
+
tokenCost: totalTokenCost,
|
|
174
|
+
consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
return results;
|
|
179
|
+
}
|
|
180
|
+
function generateSummary(results) {
|
|
181
|
+
const allIssues = results.flatMap((r) => r.issues);
|
|
182
|
+
const totalTokenCost = results.reduce(
|
|
183
|
+
(sum, r) => sum + (r.metrics.tokenCost || 0),
|
|
184
|
+
0
|
|
185
|
+
);
|
|
186
|
+
const patternsByType = {
|
|
187
|
+
"api-handler": 0,
|
|
188
|
+
validator: 0,
|
|
189
|
+
utility: 0,
|
|
190
|
+
"class-method": 0,
|
|
191
|
+
component: 0,
|
|
192
|
+
function: 0,
|
|
193
|
+
unknown: 0
|
|
194
|
+
};
|
|
195
|
+
allIssues.forEach((issue) => {
|
|
196
|
+
const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
197
|
+
if (match) {
|
|
198
|
+
const type = match[1];
|
|
199
|
+
patternsByType[type] = (patternsByType[type] || 0) + 1;
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
const topDuplicates = allIssues.slice(0, 10).map((issue) => {
|
|
203
|
+
const similarityMatch = issue.message.match(/(\d+)% similar/);
|
|
204
|
+
const tokenMatch = issue.message.match(/\((\d+) tokens/);
|
|
205
|
+
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
206
|
+
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
207
|
+
return {
|
|
208
|
+
file1: issue.location.file,
|
|
209
|
+
file2: fileMatch?.[1] || "unknown",
|
|
210
|
+
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
211
|
+
patternType: typeMatch?.[1] || "unknown",
|
|
212
|
+
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
213
|
+
};
|
|
214
|
+
});
|
|
215
|
+
return {
|
|
216
|
+
totalPatterns: allIssues.length,
|
|
217
|
+
totalTokenCost,
|
|
218
|
+
patternsByType,
|
|
219
|
+
topDuplicates
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export {
|
|
224
|
+
detectDuplicatePatterns,
|
|
225
|
+
analyzePatterns,
|
|
226
|
+
generateSummary
|
|
227
|
+
};
|
package/dist/cli.d.mts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|