@getmikk/core 2.0.13 → 2.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/analysis/index.ts +9 -0
- package/src/analysis/taint-analysis.ts +419 -0
- package/src/analysis/type-flow.ts +247 -0
- package/src/cache/incremental-cache.ts +272 -0
- package/src/cache/index.ts +1 -0
- package/src/contract/contract-generator.ts +31 -3
- package/src/contract/lock-compiler.ts +31 -0
- package/src/contract/schema.ts +2 -0
- package/src/index.ts +2 -0
- package/src/parser/error-recovery.ts +646 -0
- package/src/parser/index.ts +29 -0
- package/src/parser/tree-sitter/parser.ts +35 -8
- package/src/security/index.ts +1 -0
- package/src/security/scanner.ts +342 -0
- package/src/utils/fs.ts +46 -15
- package/src/utils/language-registry.ts +13 -0
- package/src/utils/minimatch.ts +49 -6
|
@@ -519,13 +519,41 @@ export class TreeSitterParser extends BaseParser {
|
|
|
519
519
|
try {
|
|
520
520
|
const nameForFile = name.replace(/-/g, '_')
|
|
521
521
|
|
|
522
|
-
// Try multiple possible WASM locations
|
|
523
|
-
const
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
522
|
+
// Try multiple possible WASM locations, including parent directories and siblings for monorepos
|
|
523
|
+
const baseDirs = new Set<string>()
|
|
524
|
+
baseDirs.add(process.cwd())
|
|
525
|
+
|
|
526
|
+
// Add parent directories (up to 4 levels) for monorepo setups
|
|
527
|
+
let current = process.cwd()
|
|
528
|
+
let parentDir = ''
|
|
529
|
+
for (let i = 0; i < 4; i++) {
|
|
530
|
+
parentDir = path.dirname(current)
|
|
531
|
+
if (parentDir === current) break
|
|
532
|
+
baseDirs.add(parentDir)
|
|
533
|
+
baseDirs.add(path.join(parentDir, 'node_modules'))
|
|
534
|
+
|
|
535
|
+
// Also check sibling directories in the parent for monorepo setups
|
|
536
|
+
// (e.g., metis and Mesh are siblings under the same parent)
|
|
537
|
+
try {
|
|
538
|
+
const fs = await import('node:fs')
|
|
539
|
+
const entries = fs.readdirSync(parentDir, { withFileTypes: true })
|
|
540
|
+
for (const entry of entries) {
|
|
541
|
+
if (entry.isDirectory() && entry.name !== path.basename(current)) {
|
|
542
|
+
baseDirs.add(path.join(parentDir, entry.name, 'node_modules'))
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
} catch { /* skip */ }
|
|
546
|
+
|
|
547
|
+
current = parentDir
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
const possiblePaths: string[] = []
|
|
551
|
+
for (const baseDir of baseDirs) {
|
|
552
|
+
if (!baseDir) continue
|
|
553
|
+
possiblePaths.push(
|
|
554
|
+
path.join(baseDir, 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
|
|
555
|
+
)
|
|
556
|
+
}
|
|
529
557
|
|
|
530
558
|
let wasmPath = ''
|
|
531
559
|
for (const p of possiblePaths) {
|
|
@@ -563,7 +591,6 @@ export class TreeSitterParser extends BaseParser {
|
|
|
563
591
|
|
|
564
592
|
if (!wasmPath) {
|
|
565
593
|
// WASM not found - but don't mark as permanent error, just skip this language
|
|
566
|
-
console.warn(`Tree-sitter WASM not found for ${name}`)
|
|
567
594
|
return null
|
|
568
595
|
}
|
|
569
596
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { SecurityScanner, type SecurityFinding, type SecurityReport } from './scanner.js'
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Security Vulnerability Scanning — foundation for detecting common patterns
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
export interface SecurityFinding {
|
|
6
|
+
id: string
|
|
7
|
+
severity: 'critical' | 'high' | 'medium' | 'low' | 'info'
|
|
8
|
+
category: string
|
|
9
|
+
title: string
|
|
10
|
+
description: string
|
|
11
|
+
file: string
|
|
12
|
+
line: number
|
|
13
|
+
column?: number
|
|
14
|
+
code: string
|
|
15
|
+
suggestion?: string
|
|
16
|
+
cwe?: string
|
|
17
|
+
cve?: string
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface SecurityReport {
|
|
21
|
+
findings: SecurityFinding[]
|
|
22
|
+
summary: {
|
|
23
|
+
total: number
|
|
24
|
+
critical: number
|
|
25
|
+
high: number
|
|
26
|
+
medium: number
|
|
27
|
+
low: number
|
|
28
|
+
info: number
|
|
29
|
+
}
|
|
30
|
+
scannedFiles: number
|
|
31
|
+
scanDuration: number
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Pattern definitions for common vulnerability categories
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
interface VulnerabilityPattern {
|
|
39
|
+
id: string
|
|
40
|
+
severity: SecurityFinding['severity']
|
|
41
|
+
category: string
|
|
42
|
+
title: string
|
|
43
|
+
description: string
|
|
44
|
+
regex: RegExp
|
|
45
|
+
suggestion?: string
|
|
46
|
+
cwe?: string
|
|
47
|
+
languages?: string[]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const VULNERABILITY_PATTERNS: VulnerabilityPattern[] = [
|
|
51
|
+
// SQL Injection
|
|
52
|
+
{
|
|
53
|
+
id: 'sql-injection',
|
|
54
|
+
severity: 'critical',
|
|
55
|
+
category: 'injection',
|
|
56
|
+
title: 'Potential SQL Injection',
|
|
57
|
+
description: 'String concatenation in SQL query detected. Use parameterized queries instead.',
|
|
58
|
+
regex: /(?:execute|query|cursor\.execute)\s*\(\s*["'].*(?:\+|\$\{)/,
|
|
59
|
+
suggestion: 'Use parameterized queries: cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))',
|
|
60
|
+
cwe: 'CWE-89',
|
|
61
|
+
languages: ['python', 'javascript', 'typescript'],
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
id: 'sql-injection-fstring',
|
|
65
|
+
severity: 'critical',
|
|
66
|
+
category: 'injection',
|
|
67
|
+
title: 'SQL Injection via f-string',
|
|
68
|
+
description: 'f-string used in SQL query. Use parameterized queries.',
|
|
69
|
+
regex: /(?:execute|query)\s*\(\s*f["']/,
|
|
70
|
+
suggestion: 'Use parameterized queries instead of f-strings in SQL.',
|
|
71
|
+
cwe: 'CWE-89',
|
|
72
|
+
languages: ['python'],
|
|
73
|
+
},
|
|
74
|
+
|
|
75
|
+
// Command Injection
|
|
76
|
+
{
|
|
77
|
+
id: 'command-injection',
|
|
78
|
+
severity: 'critical',
|
|
79
|
+
category: 'injection',
|
|
80
|
+
title: 'Potential Command Injection',
|
|
81
|
+
description: 'User input may be passed to shell command. Use subprocess with list args instead.',
|
|
82
|
+
regex: /(?:os\.system|subprocess\.call|subprocess\.Popen|exec|eval)\s*\(\s*(?:.*\+|.*\$\{)/,
|
|
83
|
+
suggestion: 'Use subprocess.run() with a list of arguments instead of shell=True.',
|
|
84
|
+
cwe: 'CWE-78',
|
|
85
|
+
languages: ['python'],
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
id: 'eval-usage',
|
|
89
|
+
severity: 'high',
|
|
90
|
+
category: 'injection',
|
|
91
|
+
title: 'Use of eval()',
|
|
92
|
+
description: 'eval() can execute arbitrary code. Use ast.literal_eval() for safe parsing.',
|
|
93
|
+
regex: /\beval\s*\(/,
|
|
94
|
+
suggestion: 'Use ast.literal_eval() for parsing Python literals, or json.loads() for JSON.',
|
|
95
|
+
cwe: 'CWE-95',
|
|
96
|
+
languages: ['python', 'javascript', 'typescript'],
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
// Hardcoded Secrets
|
|
100
|
+
{
|
|
101
|
+
id: 'hardcoded-password',
|
|
102
|
+
severity: 'high',
|
|
103
|
+
category: 'secrets',
|
|
104
|
+
title: 'Hardcoded Password',
|
|
105
|
+
description: 'Password appears to be hardcoded in source code.',
|
|
106
|
+
regex: /(?:password|passwd|pwd)\s*[:=]\s*["'][^"']{3,}["']/i,
|
|
107
|
+
suggestion: 'Use environment variables or a secrets manager.',
|
|
108
|
+
cwe: 'CWE-798',
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
id: 'hardcoded-api-key',
|
|
112
|
+
severity: 'high',
|
|
113
|
+
category: 'secrets',
|
|
114
|
+
title: 'Hardcoded API Key',
|
|
115
|
+
description: 'API key or token appears to be hardcoded.',
|
|
116
|
+
regex: /(?:api[_-]?key|api[_-]?secret|access[_-]?token|auth[_-]?token)\s*[:=]\s*["'][A-Za-z0-9_-]{8,}["']/i,
|
|
117
|
+
suggestion: 'Use environment variables or a secrets manager.',
|
|
118
|
+
cwe: 'CWE-798',
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
id: 'aws-key',
|
|
122
|
+
severity: 'critical',
|
|
123
|
+
category: 'secrets',
|
|
124
|
+
title: 'AWS Access Key',
|
|
125
|
+
description: 'AWS access key pattern detected.',
|
|
126
|
+
regex: /AKIA[0-9A-Z]{16}/,
|
|
127
|
+
suggestion: 'Remove AWS credentials from source code. Use IAM roles or environment variables.',
|
|
128
|
+
cwe: 'CWE-798',
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
id: 'private-key',
|
|
132
|
+
severity: 'critical',
|
|
133
|
+
category: 'secrets',
|
|
134
|
+
title: 'Private Key',
|
|
135
|
+
description: 'Private key content detected in source code.',
|
|
136
|
+
regex: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/,
|
|
137
|
+
suggestion: 'Never embed private keys in source code. Use a secrets manager.',
|
|
138
|
+
cwe: 'CWE-798',
|
|
139
|
+
},
|
|
140
|
+
|
|
141
|
+
// XSS
|
|
142
|
+
{
|
|
143
|
+
id: 'xss-innerhtml',
|
|
144
|
+
severity: 'high',
|
|
145
|
+
category: 'xss',
|
|
146
|
+
title: 'Potential XSS via innerHTML',
|
|
147
|
+
description: 'Setting innerHTML with dynamic content can lead to XSS.',
|
|
148
|
+
regex: /\.innerHTML\s*=\s*(?!["']\s*;?\s*$)/,
|
|
149
|
+
suggestion: 'Use textContent or sanitize HTML with DOMPurify.',
|
|
150
|
+
cwe: 'CWE-79',
|
|
151
|
+
languages: ['javascript', 'typescript'],
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
id: 'xss-dangerouslySetInnerHTML',
|
|
155
|
+
severity: 'high',
|
|
156
|
+
category: 'xss',
|
|
157
|
+
title: 'Potential XSS via dangerouslySetInnerHTML',
|
|
158
|
+
description: 'dangerouslySetInnerHTML with dynamic content can lead to XSS.',
|
|
159
|
+
regex: /dangerouslySetInnerHTML\s*=\s*\{\{?\s*__html\s*:/,
|
|
160
|
+
suggestion: 'Sanitize HTML content with DOMPurify before using dangerouslySetInnerHTML.',
|
|
161
|
+
cwe: 'CWE-79',
|
|
162
|
+
languages: ['javascript', 'typescript'],
|
|
163
|
+
},
|
|
164
|
+
|
|
165
|
+
// Insecure Random
|
|
166
|
+
{
|
|
167
|
+
id: 'insecure-random',
|
|
168
|
+
severity: 'medium',
|
|
169
|
+
category: 'crypto',
|
|
170
|
+
title: 'Insecure Random Number Generator',
|
|
171
|
+
description: 'Math.random() is not cryptographically secure.',
|
|
172
|
+
regex: /Math\.random\s*\(\)/,
|
|
173
|
+
suggestion: 'Use crypto.getRandomValues() for security-sensitive operations.',
|
|
174
|
+
cwe: 'CWE-330',
|
|
175
|
+
languages: ['javascript', 'typescript'],
|
|
176
|
+
},
|
|
177
|
+
|
|
178
|
+
// Path Traversal
|
|
179
|
+
{
|
|
180
|
+
id: 'path-traversal',
|
|
181
|
+
severity: 'high',
|
|
182
|
+
category: 'path-traversal',
|
|
183
|
+
title: 'Potential Path Traversal',
|
|
184
|
+
description: 'User input used in file path without sanitization.',
|
|
185
|
+
regex: /(?:readFile|readFileSync|open|writeFile|writeFileSync)\s*\(\s*(?:.*\+|.*\$\{)/,
|
|
186
|
+
suggestion: 'Validate and sanitize file paths. Use path.resolve() with a whitelist.',
|
|
187
|
+
cwe: 'CWE-22',
|
|
188
|
+
languages: ['javascript', 'typescript', 'python'],
|
|
189
|
+
},
|
|
190
|
+
|
|
191
|
+
// Weak Cryptography
|
|
192
|
+
{
|
|
193
|
+
id: 'weak-hash-md5',
|
|
194
|
+
severity: 'medium',
|
|
195
|
+
category: 'crypto',
|
|
196
|
+
title: 'Weak Hashing Algorithm (MD5)',
|
|
197
|
+
description: 'MD5 is cryptographically broken. Use SHA-256 or better.',
|
|
198
|
+
regex: /(?:md5|MD5|hashlib\.md5)/,
|
|
199
|
+
suggestion: 'Use SHA-256 or SHA-3 for cryptographic hashing.',
|
|
200
|
+
cwe: 'CWE-328',
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
id: 'weak-hash-sha1',
|
|
204
|
+
severity: 'medium',
|
|
205
|
+
category: 'crypto',
|
|
206
|
+
title: 'Weak Hashing Algorithm (SHA-1)',
|
|
207
|
+
description: 'SHA-1 is deprecated for cryptographic use. Use SHA-256 or better.',
|
|
208
|
+
regex: /(?:sha1|SHA1|hashlib\.sha1)/,
|
|
209
|
+
suggestion: 'Use SHA-256 or SHA-3 for cryptographic hashing.',
|
|
210
|
+
cwe: 'CWE-328',
|
|
211
|
+
},
|
|
212
|
+
|
|
213
|
+
// Debug/Console in Production
|
|
214
|
+
{
|
|
215
|
+
id: 'console-log',
|
|
216
|
+
severity: 'info',
|
|
217
|
+
category: 'best-practice',
|
|
218
|
+
title: 'Console Log Statement',
|
|
219
|
+
description: 'Console.log statements should be removed before production.',
|
|
220
|
+
regex: /console\.(log|debug|info|warn)\s*\(/,
|
|
221
|
+
suggestion: 'Use a proper logging framework and remove debug statements.',
|
|
222
|
+
languages: ['javascript', 'typescript'],
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
id: 'print-debug',
|
|
226
|
+
severity: 'info',
|
|
227
|
+
category: 'best-practice',
|
|
228
|
+
title: 'Print Debug Statement',
|
|
229
|
+
description: 'Print statements should be removed before production.',
|
|
230
|
+
regex: /print\s*\(\s*["'][^"']*["']\s*\)/,
|
|
231
|
+
suggestion: 'Use the logging module instead of print statements.',
|
|
232
|
+
languages: ['python'],
|
|
233
|
+
},
|
|
234
|
+
|
|
235
|
+
// TODO/FIXME/HACK
|
|
236
|
+
{
|
|
237
|
+
id: 'todo-comment',
|
|
238
|
+
severity: 'info',
|
|
239
|
+
category: 'best-practice',
|
|
240
|
+
title: 'TODO Comment',
|
|
241
|
+
description: 'TODO comment found. Consider addressing this.',
|
|
242
|
+
regex: /\/\/\s*TODO|\/\*\s*TODO|#\s*TODO/i,
|
|
243
|
+
languages: ['javascript', 'typescript', 'python', 'go', 'java', 'rust'],
|
|
244
|
+
},
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
248
|
+
// Scanner
|
|
249
|
+
// ---------------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
export class SecurityScanner {
|
|
252
|
+
private patterns: VulnerabilityPattern[]
|
|
253
|
+
|
|
254
|
+
constructor(customPatterns?: VulnerabilityPattern[]) {
|
|
255
|
+
this.patterns = customPatterns ?? VULNERABILITY_PATTERNS
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Scan a single file's content for security issues.
|
|
260
|
+
*/
|
|
261
|
+
scanFile(filePath: string, content: string, language?: string): SecurityFinding[] {
|
|
262
|
+
const findings: SecurityFinding[] = []
|
|
263
|
+
const lines = content.split('\n')
|
|
264
|
+
|
|
265
|
+
for (const pattern of this.patterns) {
|
|
266
|
+
// Skip if language filter doesn't match
|
|
267
|
+
if (pattern.languages && language && !pattern.languages.includes(language)) {
|
|
268
|
+
continue
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
for (let i = 0; i < lines.length; i++) {
|
|
272
|
+
const line = lines[i]
|
|
273
|
+
const match = line.match(pattern.regex)
|
|
274
|
+
if (match) {
|
|
275
|
+
findings.push({
|
|
276
|
+
id: `${pattern.id}-${filePath}:${i + 1}`,
|
|
277
|
+
severity: pattern.severity,
|
|
278
|
+
category: pattern.category,
|
|
279
|
+
title: pattern.title,
|
|
280
|
+
description: pattern.description,
|
|
281
|
+
file: filePath,
|
|
282
|
+
line: i + 1,
|
|
283
|
+
column: match.index,
|
|
284
|
+
code: line.trim(),
|
|
285
|
+
suggestion: pattern.suggestion,
|
|
286
|
+
cwe: pattern.cwe,
|
|
287
|
+
})
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
return findings
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Scan multiple files.
|
|
297
|
+
*/
|
|
298
|
+
scanFiles(
|
|
299
|
+
files: Array<{ path: string; content: string; language?: string }>
|
|
300
|
+
): SecurityReport {
|
|
301
|
+
const startTime = Date.now()
|
|
302
|
+
const allFindings: SecurityFinding[] = []
|
|
303
|
+
|
|
304
|
+
for (const file of files) {
|
|
305
|
+
const findings = this.scanFile(file.path, file.content, file.language)
|
|
306
|
+
allFindings.push(...findings)
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const summary = {
|
|
310
|
+
total: allFindings.length,
|
|
311
|
+
critical: allFindings.filter(f => f.severity === 'critical').length,
|
|
312
|
+
high: allFindings.filter(f => f.severity === 'high').length,
|
|
313
|
+
medium: allFindings.filter(f => f.severity === 'medium').length,
|
|
314
|
+
low: allFindings.filter(f => f.severity === 'low').length,
|
|
315
|
+
info: allFindings.filter(f => f.severity === 'info').length,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return {
|
|
319
|
+
findings: allFindings.sort((a, b) => {
|
|
320
|
+
const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 }
|
|
321
|
+
return severityOrder[a.severity] - severityOrder[b.severity]
|
|
322
|
+
}),
|
|
323
|
+
summary,
|
|
324
|
+
scannedFiles: files.length,
|
|
325
|
+
scanDuration: Date.now() - startTime,
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Add custom vulnerability patterns.
|
|
331
|
+
*/
|
|
332
|
+
addPattern(pattern: VulnerabilityPattern): void {
|
|
333
|
+
this.patterns.push(pattern)
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Get all available patterns.
|
|
338
|
+
*/
|
|
339
|
+
getPatterns(): VulnerabilityPattern[] {
|
|
340
|
+
return [...this.patterns]
|
|
341
|
+
}
|
|
342
|
+
}
|
package/src/utils/fs.ts
CHANGED
|
@@ -276,7 +276,7 @@ function inferContextFileType(filePath: string): ContextFileType {
|
|
|
276
276
|
}
|
|
277
277
|
|
|
278
278
|
/** Recognised project language */
|
|
279
|
-
export type ProjectLanguage = 'typescript' | 'javascript' | 'python' | 'go' | 'rust' | 'java' | 'swift' | 'ruby' | 'php' | 'csharp' | 'c' | 'cpp' | 'unknown'
|
|
279
|
+
export type ProjectLanguage = 'typescript' | 'javascript' | 'python' | 'go' | 'rust' | 'java' | 'swift' | 'ruby' | 'php' | 'csharp' | 'c' | 'cpp' | 'unknown' | 'polyglot'
|
|
280
280
|
|
|
281
281
|
/** Auto-detect the project's primary language from manifest files */
|
|
282
282
|
export async function detectProjectLanguage(projectRoot: string): Promise<ProjectLanguage> {
|
|
@@ -310,24 +310,28 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
|
|
|
310
310
|
]
|
|
311
311
|
|
|
312
312
|
const toPatterns = (lang: ProjectLanguage): string[] => {
|
|
313
|
-
|
|
313
|
+
if (lang === 'polyglot') {
|
|
314
|
+
// For polyglot, use LANGUAGE_EXTENSIONS.polyglot directly
|
|
315
|
+
return getDiscoveryExtensions('polyglot' as any).map(ext => `**/*${ext}`)
|
|
316
|
+
}
|
|
317
|
+
return getDiscoveryExtensions(lang as any).map(ext => `**/*${ext}`)
|
|
314
318
|
}
|
|
315
319
|
|
|
316
320
|
switch (language) {
|
|
317
321
|
case 'typescript':
|
|
318
322
|
return {
|
|
319
323
|
patterns: toPatterns(language),
|
|
320
|
-
ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}'],
|
|
324
|
+
ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}', '**/venv/**', '**/.venv/**'],
|
|
321
325
|
}
|
|
322
326
|
case 'javascript':
|
|
323
327
|
return {
|
|
324
328
|
patterns: toPatterns(language),
|
|
325
|
-
ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}'],
|
|
329
|
+
ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}', '**/venv/**', '**/.venv/**'],
|
|
326
330
|
}
|
|
327
331
|
case 'python':
|
|
328
332
|
return {
|
|
329
333
|
patterns: toPatterns(language),
|
|
330
|
-
ignore: [...commonIgnore, '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/test_*.py', '**/*_test.py'],
|
|
334
|
+
ignore: [...commonIgnore, '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/test_*.py', '**/*_test.py', '**/lib/site-packages/**'],
|
|
331
335
|
}
|
|
332
336
|
case 'go':
|
|
333
337
|
return {
|
|
@@ -352,34 +356,47 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
|
|
|
352
356
|
case 'ruby':
|
|
353
357
|
return {
|
|
354
358
|
patterns: toPatterns(language),
|
|
355
|
-
ignore: [...commonIgnore, '**/vendor/**', '
|
|
359
|
+
ignore: [...commonIgnore, '**/vendor/**', '**/*.gemspec'],
|
|
356
360
|
}
|
|
357
361
|
case 'php':
|
|
358
362
|
return {
|
|
359
363
|
patterns: toPatterns(language),
|
|
360
|
-
ignore: [...commonIgnore, '**/vendor/**', '
|
|
364
|
+
ignore: [...commonIgnore, '**/vendor/**', '**/tests/**', '**/Test*.php'],
|
|
361
365
|
}
|
|
362
366
|
case 'csharp':
|
|
363
367
|
return {
|
|
364
368
|
patterns: toPatterns(language),
|
|
365
|
-
ignore: [...commonIgnore, '**/bin/**', '**/obj/**'],
|
|
369
|
+
ignore: [...commonIgnore, '**/bin/**', '**/obj/**', '**/*Test.cs'],
|
|
366
370
|
}
|
|
367
|
-
case '
|
|
371
|
+
case 'c':
|
|
368
372
|
return {
|
|
369
373
|
patterns: toPatterns(language),
|
|
370
|
-
ignore: [...commonIgnore, '
|
|
374
|
+
ignore: [...commonIgnore, '**/*.h'],
|
|
371
375
|
}
|
|
372
|
-
case '
|
|
376
|
+
case 'cpp':
|
|
373
377
|
return {
|
|
374
378
|
patterns: toPatterns(language),
|
|
375
|
-
ignore: [...commonIgnore, '**/build/**'],
|
|
379
|
+
ignore: [...commonIgnore, '**/build/**', '**/*.hpp'],
|
|
376
380
|
}
|
|
377
|
-
|
|
378
|
-
// Fallback: discover JS/TS (most common)
|
|
381
|
+
case 'polyglot':
|
|
379
382
|
return {
|
|
380
383
|
patterns: toPatterns(language),
|
|
381
|
-
ignore: [
|
|
384
|
+
ignore: [
|
|
385
|
+
...commonIgnore,
|
|
386
|
+
'**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**',
|
|
387
|
+
'**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/lib/site-packages/**',
|
|
388
|
+
'**/vendor/**', '**/target/**', '**/.gradle/**', '**/.build/**', '**/bin/**', '**/obj/**',
|
|
389
|
+
'**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}',
|
|
390
|
+
'**/test_*.py', '**/*_test.py', '**/Test*.java', '**/*Test.java', '**/*Test.cs',
|
|
391
|
+
],
|
|
382
392
|
}
|
|
393
|
+
case 'unknown':
|
|
394
|
+
return {
|
|
395
|
+
patterns: ['**/*.{ts,tsx,js,jsx}'],
|
|
396
|
+
ignore: [...commonIgnore, '**/node_modules/**'],
|
|
397
|
+
}
|
|
398
|
+
default:
|
|
399
|
+
return { patterns: [], ignore: commonIgnore }
|
|
383
400
|
}
|
|
384
401
|
}
|
|
385
402
|
|
|
@@ -625,6 +642,20 @@ const LANGUAGE_IGNORE_TEMPLATES: Record<ProjectLanguage, string[]> = {
|
|
|
625
642
|
'__tests__/',
|
|
626
643
|
'',
|
|
627
644
|
],
|
|
645
|
+
polyglot: [
|
|
646
|
+
'# Multi-language project',
|
|
647
|
+
'**/node_modules/**',
|
|
648
|
+
'**/venv/**',
|
|
649
|
+
'**/.venv/**',
|
|
650
|
+
'**/__pycache__/**',
|
|
651
|
+
'**/site-packages/**',
|
|
652
|
+
'**/vendor/**',
|
|
653
|
+
'**/target/**',
|
|
654
|
+
'**/build/**',
|
|
655
|
+
'**/dist/**',
|
|
656
|
+
'**/.next/**',
|
|
657
|
+
'',
|
|
658
|
+
],
|
|
628
659
|
}
|
|
629
660
|
|
|
630
661
|
/**
|
|
@@ -14,6 +14,7 @@ export type RegistryLanguage =
|
|
|
14
14
|
| 'csharp'
|
|
15
15
|
| 'c'
|
|
16
16
|
| 'cpp'
|
|
17
|
+
| 'polyglot'
|
|
17
18
|
| 'unknown'
|
|
18
19
|
|
|
19
20
|
const OXC_EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'] as const
|
|
@@ -44,6 +45,18 @@ const LANGUAGE_EXTENSIONS: Record<RegistryLanguage, readonly string[]> = {
|
|
|
44
45
|
csharp: ['.cs'],
|
|
45
46
|
c: ['.c', '.h'],
|
|
46
47
|
cpp: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh', '.h'],
|
|
48
|
+
polyglot: [
|
|
49
|
+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
|
50
|
+
'.py',
|
|
51
|
+
'.go',
|
|
52
|
+
'.rs',
|
|
53
|
+
'.java', '.kt', '.kts',
|
|
54
|
+
'.swift',
|
|
55
|
+
'.rb',
|
|
56
|
+
'.php',
|
|
57
|
+
'.cs',
|
|
58
|
+
'.c', '.h', '.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh',
|
|
59
|
+
],
|
|
47
60
|
unknown: ['.ts', '.tsx', '.js', '.jsx'],
|
|
48
61
|
}
|
|
49
62
|
|
package/src/utils/minimatch.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Rules:
|
|
5
5
|
* - Pattern with no glob chars (*, ?, {, [) → directory prefix match
|
|
6
6
|
* "src/auth" matches "src/auth/jwt.ts" and "src/auth" itself
|
|
7
|
-
* - "**" matches any depth
|
|
7
|
+
* - "**" matches any depth (zero or more directory segments)
|
|
8
8
|
* - "*" matches within a single directory segment
|
|
9
9
|
*/
|
|
10
10
|
export function minimatch(filePath: string, pattern: string): boolean {
|
|
@@ -17,12 +17,55 @@ export function minimatch(filePath: string, pattern: string): boolean {
|
|
|
17
17
|
return normalizedPath === bare || normalizedPath.startsWith(bare + '/')
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
//
|
|
21
|
-
|
|
20
|
+
// Handle patterns that start with ** - these should match anywhere in the path
|
|
21
|
+
// e.g., **/venv/** should match if there's a /venv/ segment anywhere
|
|
22
|
+
if (normalizedPattern.startsWith('**/')) {
|
|
23
|
+
const rest = normalizedPattern.slice(3) // Remove **/
|
|
24
|
+
// Check if the rest of the pattern appears as a path segment
|
|
25
|
+
// For **/venv/**, check if /venv/ is in the path
|
|
26
|
+
// For **/node_modules/**, check if /node_modules/ is in the path
|
|
27
|
+
const segments = normalizedPath.split('/')
|
|
28
|
+
const patternSegments = rest.split('/').filter(Boolean)
|
|
29
|
+
|
|
30
|
+
// Check if pattern segments appear consecutively in path
|
|
31
|
+
for (let i = 0; i <= segments.length - patternSegments.length; i++) {
|
|
32
|
+
let match = true
|
|
33
|
+
for (let j = 0; j < patternSegments.length; j++) {
|
|
34
|
+
const pseg = patternSegments[j].replace(/\*/g, '[^/]*')
|
|
35
|
+
if (!new RegExp('^' + pseg + '$', 'i').test(segments[i + j])) {
|
|
36
|
+
match = false
|
|
37
|
+
break
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
if (match) return true
|
|
41
|
+
}
|
|
42
|
+
return false
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Convert glob to regex (for patterns not starting with **)
|
|
46
|
+
let regexStr = normalizedPattern
|
|
47
|
+
.replace(/\[/g, '\\[')
|
|
48
|
+
.replace(/\]/g, '\\]')
|
|
22
49
|
.replace(/\./g, '\\.')
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
50
|
+
|
|
51
|
+
// Replace **/ at end with (?:[^/]+/)* - matches zero or more dir segments ending with /
|
|
52
|
+
// But we need to handle path/** specifically - matching path/file, path/dir/file, etc.
|
|
53
|
+
|
|
54
|
+
// Handle trailing /** specifically - should match path itself and anything under it
|
|
55
|
+
if (normalizedPattern.endsWith('/**')) {
|
|
56
|
+
const base = normalizedPattern.slice(0, -3) // Remove /**
|
|
57
|
+
// Match either exact base or base + anything
|
|
58
|
+
return normalizedPath === base || normalizedPath.startsWith(base + '/')
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Replace **/ with (?:[^/]+/)* - matches zero or more directory segments
|
|
62
|
+
regexStr = regexStr.replace(/\*\*\//g, '(?:[^/]+/)*')
|
|
63
|
+
// Replace trailing ** with (?:[^/]+/)*[^/]+ - matches zero or more at end
|
|
64
|
+
regexStr = regexStr.replace(/\*\*$/g, '(?:[^/]+/)*[^/]+')
|
|
65
|
+
// Standalone **
|
|
66
|
+
regexStr = regexStr.replace(/\*\*/g, '(?:[^/]+/)*[^/]+')
|
|
67
|
+
// Single * matches any characters except slash
|
|
68
|
+
regexStr = regexStr.replace(/\*/g, '[^/]*')
|
|
26
69
|
|
|
27
70
|
return new RegExp(`^${regexStr}$`, 'i').test(normalizedPath)
|
|
28
71
|
}
|