@getmikk/core 2.0.13 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -519,13 +519,41 @@ export class TreeSitterParser extends BaseParser {
519
519
  try {
520
520
  const nameForFile = name.replace(/-/g, '_')
521
521
 
522
- // Try multiple possible WASM locations
523
- const possiblePaths = [
524
- path.resolve('node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
525
- path.resolve('./node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
526
- path.resolve(process.cwd(), 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
527
- path.resolve(process.cwd(), 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`),
528
- ]
522
+ // Try multiple possible WASM locations, including parent directories and siblings for monorepos
523
+ const baseDirs = new Set<string>()
524
+ baseDirs.add(process.cwd())
525
+
526
+ // Add parent directories (up to 4 levels) for monorepo setups
527
+ let current = process.cwd()
528
+ let parentDir = ''
529
+ for (let i = 0; i < 4; i++) {
530
+ parentDir = path.dirname(current)
531
+ if (parentDir === current) break
532
+ baseDirs.add(parentDir)
533
+ baseDirs.add(path.join(parentDir, 'node_modules'))
534
+
535
+ // Also check sibling directories in the parent for monorepo setups
536
+ // (e.g., metis and Mesh are siblings under the same parent)
537
+ try {
538
+ const fs = await import('node:fs')
539
+ const entries = fs.readdirSync(parentDir, { withFileTypes: true })
540
+ for (const entry of entries) {
541
+ if (entry.isDirectory() && entry.name !== path.basename(current)) {
542
+ baseDirs.add(path.join(parentDir, entry.name, 'node_modules'))
543
+ }
544
+ }
545
+ } catch { /* skip */ }
546
+
547
+ current = parentDir
548
+ }
549
+
550
+ const possiblePaths: string[] = []
551
+ for (const baseDir of baseDirs) {
552
+ if (!baseDir) continue
553
+ possiblePaths.push(
554
+ path.join(baseDir, 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
555
+ )
556
+ }
529
557
 
530
558
  let wasmPath = ''
531
559
  for (const p of possiblePaths) {
@@ -563,7 +591,6 @@ export class TreeSitterParser extends BaseParser {
563
591
 
564
592
  if (!wasmPath) {
565
593
  // WASM not found - but don't mark as permanent error, just skip this language
566
- console.warn(`Tree-sitter WASM not found for ${name}`)
567
594
  return null
568
595
  }
569
596
 
@@ -0,0 +1 @@
1
+ export { SecurityScanner, type SecurityFinding, type SecurityReport } from './scanner.js'
@@ -0,0 +1,342 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Security Vulnerability Scanning — foundation for detecting common patterns
3
+ // ---------------------------------------------------------------------------
4
+
5
+ export interface SecurityFinding {
6
+ id: string
7
+ severity: 'critical' | 'high' | 'medium' | 'low' | 'info'
8
+ category: string
9
+ title: string
10
+ description: string
11
+ file: string
12
+ line: number
13
+ column?: number
14
+ code: string
15
+ suggestion?: string
16
+ cwe?: string
17
+ cve?: string
18
+ }
19
+
20
+ export interface SecurityReport {
21
+ findings: SecurityFinding[]
22
+ summary: {
23
+ total: number
24
+ critical: number
25
+ high: number
26
+ medium: number
27
+ low: number
28
+ info: number
29
+ }
30
+ scannedFiles: number
31
+ scanDuration: number
32
+ }
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Pattern definitions for common vulnerability categories
36
+ // ---------------------------------------------------------------------------
37
+
38
+ interface VulnerabilityPattern {
39
+ id: string
40
+ severity: SecurityFinding['severity']
41
+ category: string
42
+ title: string
43
+ description: string
44
+ regex: RegExp
45
+ suggestion?: string
46
+ cwe?: string
47
+ languages?: string[]
48
+ }
49
+
50
+ const VULNERABILITY_PATTERNS: VulnerabilityPattern[] = [
51
+ // SQL Injection
52
+ {
53
+ id: 'sql-injection',
54
+ severity: 'critical',
55
+ category: 'injection',
56
+ title: 'Potential SQL Injection',
57
+ description: 'String concatenation in SQL query detected. Use parameterized queries instead.',
58
+ regex: /(?:execute|query|cursor\.execute)\s*\(\s*["'].*(?:\+|\$\{)/,
59
+ suggestion: 'Use parameterized queries: cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))',
60
+ cwe: 'CWE-89',
61
+ languages: ['python', 'javascript', 'typescript'],
62
+ },
63
+ {
64
+ id: 'sql-injection-fstring',
65
+ severity: 'critical',
66
+ category: 'injection',
67
+ title: 'SQL Injection via f-string',
68
+ description: 'f-string used in SQL query. Use parameterized queries.',
69
+ regex: /(?:execute|query)\s*\(\s*f["']/,
70
+ suggestion: 'Use parameterized queries instead of f-strings in SQL.',
71
+ cwe: 'CWE-89',
72
+ languages: ['python'],
73
+ },
74
+
75
+ // Command Injection
76
+ {
77
+ id: 'command-injection',
78
+ severity: 'critical',
79
+ category: 'injection',
80
+ title: 'Potential Command Injection',
81
+ description: 'User input may be passed to shell command. Use subprocess with list args instead.',
82
+ regex: /(?:os\.system|subprocess\.call|subprocess\.Popen|exec|eval)\s*\(\s*(?:.*\+|.*\$\{)/,
83
+ suggestion: 'Use subprocess.run() with a list of arguments instead of shell=True.',
84
+ cwe: 'CWE-78',
85
+ languages: ['python'],
86
+ },
87
+ {
88
+ id: 'eval-usage',
89
+ severity: 'high',
90
+ category: 'injection',
91
+ title: 'Use of eval()',
92
+ description: 'eval() can execute arbitrary code. Use ast.literal_eval() for safe parsing.',
93
+ regex: /\beval\s*\(/,
94
+ suggestion: 'Use ast.literal_eval() for parsing Python literals, or json.loads() for JSON.',
95
+ cwe: 'CWE-95',
96
+ languages: ['python', 'javascript', 'typescript'],
97
+ },
98
+
99
+ // Hardcoded Secrets
100
+ {
101
+ id: 'hardcoded-password',
102
+ severity: 'high',
103
+ category: 'secrets',
104
+ title: 'Hardcoded Password',
105
+ description: 'Password appears to be hardcoded in source code.',
106
+ regex: /(?:password|passwd|pwd)\s*[:=]\s*["'][^"']{3,}["']/i,
107
+ suggestion: 'Use environment variables or a secrets manager.',
108
+ cwe: 'CWE-798',
109
+ },
110
+ {
111
+ id: 'hardcoded-api-key',
112
+ severity: 'high',
113
+ category: 'secrets',
114
+ title: 'Hardcoded API Key',
115
+ description: 'API key or token appears to be hardcoded.',
116
+ regex: /(?:api[_-]?key|api[_-]?secret|access[_-]?token|auth[_-]?token)\s*[:=]\s*["'][A-Za-z0-9_-]{8,}["']/i,
117
+ suggestion: 'Use environment variables or a secrets manager.',
118
+ cwe: 'CWE-798',
119
+ },
120
+ {
121
+ id: 'aws-key',
122
+ severity: 'critical',
123
+ category: 'secrets',
124
+ title: 'AWS Access Key',
125
+ description: 'AWS access key pattern detected.',
126
+ regex: /AKIA[0-9A-Z]{16}/,
127
+ suggestion: 'Remove AWS credentials from source code. Use IAM roles or environment variables.',
128
+ cwe: 'CWE-798',
129
+ },
130
+ {
131
+ id: 'private-key',
132
+ severity: 'critical',
133
+ category: 'secrets',
134
+ title: 'Private Key',
135
+ description: 'Private key content detected in source code.',
136
+ regex: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/,
137
+ suggestion: 'Never embed private keys in source code. Use a secrets manager.',
138
+ cwe: 'CWE-798',
139
+ },
140
+
141
+ // XSS
142
+ {
143
+ id: 'xss-innerhtml',
144
+ severity: 'high',
145
+ category: 'xss',
146
+ title: 'Potential XSS via innerHTML',
147
+ description: 'Setting innerHTML with dynamic content can lead to XSS.',
148
+ regex: /\.innerHTML\s*=\s*(?!["']\s*;?\s*$)/,
149
+ suggestion: 'Use textContent or sanitize HTML with DOMPurify.',
150
+ cwe: 'CWE-79',
151
+ languages: ['javascript', 'typescript'],
152
+ },
153
+ {
154
+ id: 'xss-dangerouslySetInnerHTML',
155
+ severity: 'high',
156
+ category: 'xss',
157
+ title: 'Potential XSS via dangerouslySetInnerHTML',
158
+ description: 'dangerouslySetInnerHTML with dynamic content can lead to XSS.',
159
+ regex: /dangerouslySetInnerHTML\s*=\s*\{\{?\s*__html\s*:/,
160
+ suggestion: 'Sanitize HTML content with DOMPurify before using dangerouslySetInnerHTML.',
161
+ cwe: 'CWE-79',
162
+ languages: ['javascript', 'typescript'],
163
+ },
164
+
165
+ // Insecure Random
166
+ {
167
+ id: 'insecure-random',
168
+ severity: 'medium',
169
+ category: 'crypto',
170
+ title: 'Insecure Random Number Generator',
171
+ description: 'Math.random() is not cryptographically secure.',
172
+ regex: /Math\.random\s*\(\)/,
173
+ suggestion: 'Use crypto.getRandomValues() for security-sensitive operations.',
174
+ cwe: 'CWE-330',
175
+ languages: ['javascript', 'typescript'],
176
+ },
177
+
178
+ // Path Traversal
179
+ {
180
+ id: 'path-traversal',
181
+ severity: 'high',
182
+ category: 'path-traversal',
183
+ title: 'Potential Path Traversal',
184
+ description: 'User input used in file path without sanitization.',
185
+ regex: /(?:readFile|readFileSync|open|writeFile|writeFileSync)\s*\(\s*(?:.*\+|.*\$\{)/,
186
+ suggestion: 'Validate and sanitize file paths. Use path.resolve() with a whitelist.',
187
+ cwe: 'CWE-22',
188
+ languages: ['javascript', 'typescript', 'python'],
189
+ },
190
+
191
+ // Weak Cryptography
192
+ {
193
+ id: 'weak-hash-md5',
194
+ severity: 'medium',
195
+ category: 'crypto',
196
+ title: 'Weak Hashing Algorithm (MD5)',
197
+ description: 'MD5 is cryptographically broken. Use SHA-256 or better.',
198
+ regex: /(?:md5|MD5|hashlib\.md5)/,
199
+ suggestion: 'Use SHA-256 or SHA-3 for cryptographic hashing.',
200
+ cwe: 'CWE-328',
201
+ },
202
+ {
203
+ id: 'weak-hash-sha1',
204
+ severity: 'medium',
205
+ category: 'crypto',
206
+ title: 'Weak Hashing Algorithm (SHA-1)',
207
+ description: 'SHA-1 is deprecated for cryptographic use. Use SHA-256 or better.',
208
+ regex: /(?:sha1|SHA1|hashlib\.sha1)/,
209
+ suggestion: 'Use SHA-256 or SHA-3 for cryptographic hashing.',
210
+ cwe: 'CWE-328',
211
+ },
212
+
213
+ // Debug/Console in Production
214
+ {
215
+ id: 'console-log',
216
+ severity: 'info',
217
+ category: 'best-practice',
218
+ title: 'Console Log Statement',
219
+ description: 'Console.log statements should be removed before production.',
220
+ regex: /console\.(log|debug|info|warn)\s*\(/,
221
+ suggestion: 'Use a proper logging framework and remove debug statements.',
222
+ languages: ['javascript', 'typescript'],
223
+ },
224
+ {
225
+ id: 'print-debug',
226
+ severity: 'info',
227
+ category: 'best-practice',
228
+ title: 'Print Debug Statement',
229
+ description: 'Print statements should be removed before production.',
230
+ regex: /print\s*\(\s*["'][^"']*["']\s*\)/,
231
+ suggestion: 'Use the logging module instead of print statements.',
232
+ languages: ['python'],
233
+ },
234
+
235
+ // TODO/FIXME/HACK
236
+ {
237
+ id: 'todo-comment',
238
+ severity: 'info',
239
+ category: 'best-practice',
240
+ title: 'TODO Comment',
241
+ description: 'TODO comment found. Consider addressing this.',
242
+ regex: /\/\/\s*TODO|\/\*\s*TODO|#\s*TODO/i,
243
+ languages: ['javascript', 'typescript', 'python', 'go', 'java', 'rust'],
244
+ },
245
+ ]
246
+
247
+ // ---------------------------------------------------------------------------
248
+ // Scanner
249
+ // ---------------------------------------------------------------------------
250
+
251
+ export class SecurityScanner {
252
+ private patterns: VulnerabilityPattern[]
253
+
254
+ constructor(customPatterns?: VulnerabilityPattern[]) {
255
+ this.patterns = customPatterns ?? VULNERABILITY_PATTERNS
256
+ }
257
+
258
+ /**
259
+ * Scan a single file's content for security issues.
260
+ */
261
+ scanFile(filePath: string, content: string, language?: string): SecurityFinding[] {
262
+ const findings: SecurityFinding[] = []
263
+ const lines = content.split('\n')
264
+
265
+ for (const pattern of this.patterns) {
266
+ // Skip if language filter doesn't match
267
+ if (pattern.languages && language && !pattern.languages.includes(language)) {
268
+ continue
269
+ }
270
+
271
+ for (let i = 0; i < lines.length; i++) {
272
+ const line = lines[i]
273
+ const match = line.match(pattern.regex)
274
+ if (match) {
275
+ findings.push({
276
+ id: `${pattern.id}-${filePath}:${i + 1}`,
277
+ severity: pattern.severity,
278
+ category: pattern.category,
279
+ title: pattern.title,
280
+ description: pattern.description,
281
+ file: filePath,
282
+ line: i + 1,
283
+ column: match.index,
284
+ code: line.trim(),
285
+ suggestion: pattern.suggestion,
286
+ cwe: pattern.cwe,
287
+ })
288
+ }
289
+ }
290
+ }
291
+
292
+ return findings
293
+ }
294
+
295
+ /**
296
+ * Scan multiple files.
297
+ */
298
+ scanFiles(
299
+ files: Array<{ path: string; content: string; language?: string }>
300
+ ): SecurityReport {
301
+ const startTime = Date.now()
302
+ const allFindings: SecurityFinding[] = []
303
+
304
+ for (const file of files) {
305
+ const findings = this.scanFile(file.path, file.content, file.language)
306
+ allFindings.push(...findings)
307
+ }
308
+
309
+ const summary = {
310
+ total: allFindings.length,
311
+ critical: allFindings.filter(f => f.severity === 'critical').length,
312
+ high: allFindings.filter(f => f.severity === 'high').length,
313
+ medium: allFindings.filter(f => f.severity === 'medium').length,
314
+ low: allFindings.filter(f => f.severity === 'low').length,
315
+ info: allFindings.filter(f => f.severity === 'info').length,
316
+ }
317
+
318
+ return {
319
+ findings: allFindings.sort((a, b) => {
320
+ const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 }
321
+ return severityOrder[a.severity] - severityOrder[b.severity]
322
+ }),
323
+ summary,
324
+ scannedFiles: files.length,
325
+ scanDuration: Date.now() - startTime,
326
+ }
327
+ }
328
+
329
+ /**
330
+ * Add custom vulnerability patterns.
331
+ */
332
+ addPattern(pattern: VulnerabilityPattern): void {
333
+ this.patterns.push(pattern)
334
+ }
335
+
336
+ /**
337
+ * Get all available patterns.
338
+ */
339
+ getPatterns(): VulnerabilityPattern[] {
340
+ return [...this.patterns]
341
+ }
342
+ }
package/src/utils/fs.ts CHANGED
@@ -276,7 +276,7 @@ function inferContextFileType(filePath: string): ContextFileType {
276
276
  }
277
277
 
278
278
  /** Recognised project language */
279
- export type ProjectLanguage = 'typescript' | 'javascript' | 'python' | 'go' | 'rust' | 'java' | 'swift' | 'ruby' | 'php' | 'csharp' | 'c' | 'cpp' | 'unknown'
279
+ export type ProjectLanguage = 'typescript' | 'javascript' | 'python' | 'go' | 'rust' | 'java' | 'swift' | 'ruby' | 'php' | 'csharp' | 'c' | 'cpp' | 'unknown' | 'polyglot'
280
280
 
281
281
  /** Auto-detect the project's primary language from manifest files */
282
282
  export async function detectProjectLanguage(projectRoot: string): Promise<ProjectLanguage> {
@@ -310,24 +310,28 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
310
310
  ]
311
311
 
312
312
  const toPatterns = (lang: ProjectLanguage): string[] => {
313
- return getDiscoveryExtensions(lang).map(ext => `**/*${ext}`)
313
+ if (lang === 'polyglot') {
314
+ // For polyglot, use LANGUAGE_EXTENSIONS.polyglot directly
315
+ return getDiscoveryExtensions('polyglot' as any).map(ext => `**/*${ext}`)
316
+ }
317
+ return getDiscoveryExtensions(lang as any).map(ext => `**/*${ext}`)
314
318
  }
315
319
 
316
320
  switch (language) {
317
321
  case 'typescript':
318
322
  return {
319
323
  patterns: toPatterns(language),
320
- ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}'],
324
+ ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}', '**/venv/**', '**/.venv/**'],
321
325
  }
322
326
  case 'javascript':
323
327
  return {
324
328
  patterns: toPatterns(language),
325
- ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}'],
329
+ ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}', '**/venv/**', '**/.venv/**'],
326
330
  }
327
331
  case 'python':
328
332
  return {
329
333
  patterns: toPatterns(language),
330
- ignore: [...commonIgnore, '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/test_*.py', '**/*_test.py'],
334
+ ignore: [...commonIgnore, '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/test_*.py', '**/*_test.py', '**/lib/site-packages/**'],
331
335
  }
332
336
  case 'go':
333
337
  return {
@@ -352,34 +356,47 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
352
356
  case 'ruby':
353
357
  return {
354
358
  patterns: toPatterns(language),
355
- ignore: [...commonIgnore, '**/vendor/**', '**/*_spec.rb', '**/spec/**'],
359
+ ignore: [...commonIgnore, '**/vendor/**', '**/*.gemspec'],
356
360
  }
357
361
  case 'php':
358
362
  return {
359
363
  patterns: toPatterns(language),
360
- ignore: [...commonIgnore, '**/vendor/**', '**/*Test.php'],
364
+ ignore: [...commonIgnore, '**/vendor/**', '**/tests/**', '**/Test*.php'],
361
365
  }
362
366
  case 'csharp':
363
367
  return {
364
368
  patterns: toPatterns(language),
365
- ignore: [...commonIgnore, '**/bin/**', '**/obj/**'],
369
+ ignore: [...commonIgnore, '**/bin/**', '**/obj/**', '**/*Test.cs'],
366
370
  }
367
- case 'cpp':
371
+ case 'c':
368
372
  return {
369
373
  patterns: toPatterns(language),
370
- ignore: [...commonIgnore, '**/build/**', '**/cmake-build-*/**'],
374
+ ignore: [...commonIgnore, '**/*.h'],
371
375
  }
372
- case 'c':
376
+ case 'cpp':
373
377
  return {
374
378
  patterns: toPatterns(language),
375
- ignore: [...commonIgnore, '**/build/**'],
379
+ ignore: [...commonIgnore, '**/build/**', '**/*.hpp'],
376
380
  }
377
- default:
378
- // Fallback: discover JS/TS (most common)
381
+ case 'polyglot':
379
382
  return {
380
383
  patterns: toPatterns(language),
381
- ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/*.d.ts'],
384
+ ignore: [
385
+ ...commonIgnore,
386
+ '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**',
387
+ '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/lib/site-packages/**',
388
+ '**/vendor/**', '**/target/**', '**/.gradle/**', '**/.build/**', '**/bin/**', '**/obj/**',
389
+ '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}',
390
+ '**/test_*.py', '**/*_test.py', '**/Test*.java', '**/*Test.java', '**/*Test.cs',
391
+ ],
382
392
  }
393
+ case 'unknown':
394
+ return {
395
+ patterns: ['**/*.{ts,tsx,js,jsx}'],
396
+ ignore: [...commonIgnore, '**/node_modules/**'],
397
+ }
398
+ default:
399
+ return { patterns: [], ignore: commonIgnore }
383
400
  }
384
401
  }
385
402
 
@@ -625,6 +642,20 @@ const LANGUAGE_IGNORE_TEMPLATES: Record<ProjectLanguage, string[]> = {
625
642
  '__tests__/',
626
643
  '',
627
644
  ],
645
+ polyglot: [
646
+ '# Multi-language project',
647
+ '**/node_modules/**',
648
+ '**/venv/**',
649
+ '**/.venv/**',
650
+ '**/__pycache__/**',
651
+ '**/site-packages/**',
652
+ '**/vendor/**',
653
+ '**/target/**',
654
+ '**/build/**',
655
+ '**/dist/**',
656
+ '**/.next/**',
657
+ '',
658
+ ],
628
659
  }
629
660
 
630
661
  /**
@@ -14,6 +14,7 @@ export type RegistryLanguage =
14
14
  | 'csharp'
15
15
  | 'c'
16
16
  | 'cpp'
17
+ | 'polyglot'
17
18
  | 'unknown'
18
19
 
19
20
  const OXC_EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'] as const
@@ -44,6 +45,18 @@ const LANGUAGE_EXTENSIONS: Record<RegistryLanguage, readonly string[]> = {
44
45
  csharp: ['.cs'],
45
46
  c: ['.c', '.h'],
46
47
  cpp: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh', '.h'],
48
+ polyglot: [
49
+ '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
50
+ '.py',
51
+ '.go',
52
+ '.rs',
53
+ '.java', '.kt', '.kts',
54
+ '.swift',
55
+ '.rb',
56
+ '.php',
57
+ '.cs',
58
+ '.c', '.h', '.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh',
59
+ ],
47
60
  unknown: ['.ts', '.tsx', '.js', '.jsx'],
48
61
  }
49
62
 
@@ -4,7 +4,7 @@
4
4
  * Rules:
5
5
  * - Pattern with no glob chars (*, ?, {, [) → directory prefix match
6
6
  * "src/auth" matches "src/auth/jwt.ts" and "src/auth" itself
7
- * - "**" matches any depth
7
+ * - "**" matches any depth (zero or more directory segments)
8
8
  * - "*" matches within a single directory segment
9
9
  */
10
10
  export function minimatch(filePath: string, pattern: string): boolean {
@@ -17,12 +17,55 @@ export function minimatch(filePath: string, pattern: string): boolean {
17
17
  return normalizedPath === bare || normalizedPath.startsWith(bare + '/')
18
18
  }
19
19
 
20
- // Convert glob to regex
21
- const regexStr = normalizedPattern
20
+ // Handle patterns that start with ** - these should match anywhere in the path
21
+ // e.g., **/venv/** should match if there's a /venv/ segment anywhere
22
+ if (normalizedPattern.startsWith('**/')) {
23
+ const rest = normalizedPattern.slice(3) // Remove **/
24
+ // Check if the rest of the pattern appears as a path segment
25
+ // For **/venv/**, check if /venv/ is in the path
26
+ // For **/node_modules/**, check if /node_modules/ is in the path
27
+ const segments = normalizedPath.split('/')
28
+ const patternSegments = rest.split('/').filter(Boolean)
29
+
30
+ // Check if pattern segments appear consecutively in path
31
+ for (let i = 0; i <= segments.length - patternSegments.length; i++) {
32
+ let match = true
33
+ for (let j = 0; j < patternSegments.length; j++) {
34
+ const pseg = patternSegments[j].replace(/\*/g, '[^/]*')
35
+ if (!new RegExp('^' + pseg + '$', 'i').test(segments[i + j])) {
36
+ match = false
37
+ break
38
+ }
39
+ }
40
+ if (match) return true
41
+ }
42
+ return false
43
+ }
44
+
45
+ // Convert glob to regex (for patterns not starting with **)
46
+ let regexStr = normalizedPattern
47
+ .replace(/\[/g, '\\[')
48
+ .replace(/\]/g, '\\]')
22
49
  .replace(/\./g, '\\.')
23
- .replace(/\*\*\//g, '(?:.+/)?')
24
- .replace(/\*\*/g, '.*')
25
- .replace(/\*/g, '[^/]*')
50
+
51
+ // Replace **/ at end with (?:[^/]+/)* - matches zero or more dir segments ending with /
52
+ // But we need to handle path/** specifically - matching path/file, path/dir/file, etc.
53
+
54
+ // Handle trailing /** specifically - should match path itself and anything under it
55
+ if (normalizedPattern.endsWith('/**')) {
56
+ const base = normalizedPattern.slice(0, -3) // Remove /**
57
+ // Match either exact base or base + anything
58
+ return normalizedPath === base || normalizedPath.startsWith(base + '/')
59
+ }
60
+
61
+ // Replace **/ with (?:[^/]+/)* - matches zero or more directory segments
62
+ regexStr = regexStr.replace(/\*\*\//g, '(?:[^/]+/)*')
63
+ // Replace trailing ** with (?:[^/]+/)*[^/]+ - matches zero or more at end
64
+ regexStr = regexStr.replace(/\*\*$/g, '(?:[^/]+/)*[^/]+')
65
+ // Standalone **
66
+ regexStr = regexStr.replace(/\*\*/g, '(?:[^/]+/)*[^/]+')
67
+ // Single * matches any characters except slash
68
+ regexStr = regexStr.replace(/\*/g, '[^/]*')
26
69
 
27
70
  return new RegExp(`^${regexStr}$`, 'i').test(normalizedPath)
28
71
  }