@getmikk/core 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +4 -4
  2. package/package.json +2 -1
  3. package/src/analysis/index.ts +9 -0
  4. package/src/analysis/taint-analysis.ts +419 -0
  5. package/src/analysis/type-flow.ts +247 -0
  6. package/src/cache/incremental-cache.ts +278 -0
  7. package/src/cache/index.ts +1 -0
  8. package/src/contract/contract-generator.ts +31 -3
  9. package/src/contract/contract-reader.ts +1 -0
  10. package/src/contract/lock-compiler.ts +125 -12
  11. package/src/contract/schema.ts +4 -0
  12. package/src/error-handler.ts +2 -1
  13. package/src/graph/cluster-detector.ts +2 -4
  14. package/src/graph/dead-code-detector.ts +303 -117
  15. package/src/graph/graph-builder.ts +21 -161
  16. package/src/graph/impact-analyzer.ts +1 -0
  17. package/src/graph/index.ts +2 -0
  18. package/src/graph/rich-function-index.ts +1080 -0
  19. package/src/graph/symbol-table.ts +252 -0
  20. package/src/hash/hash-store.ts +1 -0
  21. package/src/index.ts +4 -0
  22. package/src/parser/base-extractor.ts +19 -0
  23. package/src/parser/boundary-checker.ts +31 -12
  24. package/src/parser/error-recovery.ts +647 -0
  25. package/src/parser/function-body-extractor.ts +248 -0
  26. package/src/parser/go/go-extractor.ts +249 -676
  27. package/src/parser/index.ts +138 -295
  28. package/src/parser/language-registry.ts +57 -0
  29. package/src/parser/oxc-parser.ts +166 -28
  30. package/src/parser/oxc-resolver.ts +179 -11
  31. package/src/parser/parser-constants.ts +1 -0
  32. package/src/parser/rust/rust-extractor.ts +109 -0
  33. package/src/parser/tree-sitter/parser.ts +400 -66
  34. package/src/parser/tree-sitter/queries.ts +106 -10
  35. package/src/parser/types.ts +20 -1
  36. package/src/search/bm25.ts +21 -8
  37. package/src/search/direct-search.ts +472 -0
  38. package/src/search/embedding-provider.ts +249 -0
  39. package/src/search/index.ts +12 -0
  40. package/src/search/semantic-search.ts +435 -0
  41. package/src/security/index.ts +1 -0
  42. package/src/security/scanner.ts +342 -0
  43. package/src/utils/artifact-transaction.ts +1 -0
  44. package/src/utils/atomic-write.ts +1 -0
  45. package/src/utils/errors.ts +89 -4
  46. package/src/utils/fs.ts +150 -65
  47. package/src/utils/json.ts +1 -0
  48. package/src/utils/language-registry.ts +96 -5
  49. package/src/utils/minimatch.ts +49 -6
  50. package/src/utils/path.ts +26 -0
  51. package/tests/dead-code.test.ts +3 -2
  52. package/tests/direct-search.test.ts +435 -0
  53. package/tests/error-recovery.test.ts +143 -0
  54. package/tests/fixtures/simple-api/src/index.ts +1 -1
  55. package/tests/go-parser.test.ts +19 -335
  56. package/tests/js-parser.test.ts +18 -1089
  57. package/tests/language-registry-all.test.ts +276 -0
  58. package/tests/language-registry.test.ts +6 -4
  59. package/tests/parse-diagnostics.test.ts +9 -96
  60. package/tests/parser.test.ts +42 -771
  61. package/tests/polyglot-parser.test.ts +117 -0
  62. package/tests/rich-function-index.test.ts +703 -0
  63. package/tests/tree-sitter-parser.test.ts +108 -80
  64. package/tests/ts-parser.test.ts +8 -8
  65. package/tests/verification.test.ts +175 -0
  66. package/src/parser/base-parser.ts +0 -16
  67. package/src/parser/go/go-parser.ts +0 -43
  68. package/src/parser/javascript/js-extractor.ts +0 -278
  69. package/src/parser/javascript/js-parser.ts +0 -101
  70. package/src/parser/typescript/ts-extractor.ts +0 -447
  71. package/src/parser/typescript/ts-parser.ts +0 -36
package/src/utils/fs.ts CHANGED
@@ -1,3 +1,4 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import * as fs from 'node:fs/promises'
2
3
  import * as path from 'node:path'
3
4
  import fg from 'fast-glob'
@@ -184,7 +185,22 @@ export function parseMikkIgnore(content: string): string[] {
184
185
  * This is technology-agnostic: it works for Prisma, Drizzle, GraphQL, SQL,
185
186
  * Protobuf, Docker, OpenAPI, and more -- anything with a well-known file pattern.
186
187
  */
187
- export async function discoverContextFiles(projectRoot: string): Promise<ContextFile[]> {
188
+
189
+ export interface DiscoverContextFilesOptions {
190
+ /** Maximum number of context files to return (default 20) */
191
+ maxFiles?: number
192
+ /** Callback for progress updates */
193
+ onProgress?: (current: number, total: number, file: string) => void
194
+ /** Skip reading file content - just get file list with stats */
195
+ metadataOnly?: boolean
196
+ }
197
+
198
+ export async function discoverContextFiles(
199
+ projectRoot: string,
200
+ options: DiscoverContextFilesOptions = {}
201
+ ): Promise<ContextFile[]> {
202
+ const { maxFiles = 20, onProgress, metadataOnly = false } = options
203
+
188
204
  const mikkIgnore = await readMikkIgnore(projectRoot)
189
205
  const files = await fg(CONTEXT_FILE_PATTERNS, {
190
206
  cwd: projectRoot,
@@ -194,29 +210,49 @@ export async function discoverContextFiles(projectRoot: string): Promise<Context
194
210
  })
195
211
 
196
212
  const normalised = files.map(f => f.replace(/\\/g, '/'))
197
-
198
- // Deduplicate -- some patterns overlap (e.g. models/*.ts also matched by source discovery)
199
213
  const unique = [...new Set(normalised)]
200
214
 
201
215
  const results: ContextFile[] = []
202
-
203
- for (const relPath of unique) {
204
- const absPath = path.join(projectRoot, relPath)
205
- try {
206
- const stat = await fs.stat(absPath)
207
- if (stat.size > MAX_CONTEXT_FILE_SIZE) continue // skip huge files
208
- if (stat.size === 0) continue
209
-
210
- const content = await fs.readFile(absPath, 'utf-8')
211
- const type = inferContextFileType(relPath)
212
-
213
- results.push({ path: relPath, content, type, size: stat.size })
214
- } catch {
215
- // File unreadable -- skip
216
+ const batchSize = 10
217
+
218
+ for (let i = 0; i < unique.length; i += batchSize) {
219
+ const batch = unique.slice(i, i + batchSize)
220
+
221
+ const batchResults = await Promise.all(
222
+ batch.map(async (relPath) => {
223
+ const absPath = path.join(projectRoot, relPath)
224
+ try {
225
+ const stat = await fs.stat(absPath)
226
+ if (stat.size > MAX_CONTEXT_FILE_SIZE) return null
227
+ if (stat.size === 0) return null
228
+
229
+ const type = inferContextFileType(relPath)
230
+
231
+ if (onProgress) {
232
+ onProgress(results.length + 1, Math.min(unique.length, maxFiles), relPath)
233
+ }
234
+
235
+ if (metadataOnly) {
236
+ return { path: relPath, content: '', type, size: stat.size }
237
+ }
238
+
239
+ const content = await fs.readFile(absPath, 'utf-8')
240
+ return { path: relPath, content, type, size: stat.size }
241
+ } catch {
242
+ return null
243
+ }
244
+ })
245
+ )
246
+
247
+ for (const result of batchResults) {
248
+ if (result && results.length < maxFiles) {
249
+ results.push(result)
250
+ }
216
251
  }
252
+
253
+ if (results.length >= maxFiles) break
217
254
  }
218
255
 
219
- // Sort: schemas/models first, then types, routes, config
220
256
  const priority: Record<ContextFileType, number> = {
221
257
  schema: 0,
222
258
  model: 1,
@@ -229,9 +265,6 @@ export async function discoverContextFiles(projectRoot: string): Promise<Context
229
265
  }
230
266
  results.sort((a, b) => priority[a.type] - priority[b.type])
231
267
 
232
- // If we have a schema file (e.g. prisma/schema.prisma), the migrations
233
- // are redundant -- they represent historical deltas, not the current state.
234
- // Including them wastes AI tokens and can be actively misleading.
235
268
  const hasSchema = results.some(f => f.type === 'schema')
236
269
  if (hasSchema) {
237
270
  return results.filter(f => f.type !== 'migration')
@@ -276,7 +309,7 @@ function inferContextFileType(filePath: string): ContextFileType {
276
309
  }
277
310
 
278
311
  /** Recognised project language */
279
- export type ProjectLanguage = 'typescript' | 'javascript' | 'python' | 'go' | 'rust' | 'java' | 'swift' | 'ruby' | 'php' | 'csharp' | 'c' | 'cpp' | 'unknown'
312
+ export type ProjectLanguage = 'typescript' | 'javascript' | 'python' | 'go' | 'rust' | 'java' | 'swift' | 'ruby' | 'php' | 'csharp' | 'c' | 'cpp' | 'unknown' | 'polyglot'
280
313
 
281
314
  /** Auto-detect the project's primary language from manifest files */
282
315
  export async function detectProjectLanguage(projectRoot: string): Promise<ProjectLanguage> {
@@ -287,19 +320,52 @@ export async function detectProjectLanguage(projectRoot: string): Promise<Projec
287
320
  const matches = await fg(pattern, { cwd: projectRoot, onlyFiles: true, deep: 1 })
288
321
  return matches.length > 0
289
322
  }
323
+
324
+ const hasTsConfig = await exists('tsconfig.json') || await hasGlob('tsconfig.*.json')
325
+ const hasPackageJson = await exists('package.json')
326
+ const hasRust = await exists('Cargo.toml')
327
+ const hasGo = await exists('go.mod')
328
+ const hasPython = await exists('pyproject.toml') || await exists('setup.py') || await exists('requirements.txt')
329
+ const hasRuby = await exists('Gemfile')
330
+ const hasJava = await exists('pom.xml') || await exists('build.gradle') || await exists('build.gradle.kts')
331
+ const hasSwift = await exists('Package.swift')
332
+ const hasPhp = await exists('composer.json')
333
+ const hasCSharp = await hasGlob('*.csproj') || await hasGlob('*.sln')
334
+ const hasCpp = await hasGlob('CMakeLists.txt') || await hasGlob('**/*.cmake')
335
+ const hasC = await hasGlob('*.c') || await hasGlob('*.h')
336
+
337
+ // Count non-JS family manifests (TypeScript and JavaScript share package.json, so count them together)
338
+ let languageFamilyCount = 0
339
+ if (hasTsConfig || hasPackageJson) languageFamilyCount++ // JS family (TS or JS)
340
+ if (hasRust) languageFamilyCount++
341
+ if (hasGo) languageFamilyCount++
342
+ if (hasPython) languageFamilyCount++
343
+ if (hasRuby) languageFamilyCount++
344
+ if (hasJava) languageFamilyCount++
345
+ if (hasSwift) languageFamilyCount++
346
+ if (hasPhp) languageFamilyCount++
347
+ if (hasCSharp) languageFamilyCount++
348
+ if (hasCpp) languageFamilyCount++
349
+ if (hasC) languageFamilyCount++
350
+
351
+ // If multiple language families detected, it's polyglot
352
+ if (languageFamilyCount > 1) {
353
+ return 'polyglot'
354
+ }
355
+
290
356
  // Check in priority order -- most specific first
291
- if (await exists('tsconfig.json') || await hasGlob('tsconfig.*.json')) return 'typescript'
292
- if (await exists('Cargo.toml')) return 'rust'
293
- if (await exists('go.mod')) return 'go'
294
- if (await exists('pyproject.toml') || await exists('setup.py') || await exists('requirements.txt')) return 'python'
295
- if (await exists('Gemfile')) return 'ruby'
296
- if (await exists('pom.xml') || await exists('build.gradle') || await exists('build.gradle.kts')) return 'java'
297
- if (await exists('Package.swift')) return 'swift'
298
- if (await exists('composer.json')) return 'php'
299
- if (await hasGlob('*.csproj') || await hasGlob('*.sln')) return 'csharp'
300
- if (await hasGlob('CMakeLists.txt') || await hasGlob('**/*.cmake') || await hasGlob('*.cpp')) return 'cpp'
301
- if (await hasGlob('*.c') || await hasGlob('*.h')) return 'c'
302
- if (await exists('package.json')) return 'javascript'
357
+ if (hasTsConfig) return 'typescript'
358
+ if (hasRust) return 'rust'
359
+ if (hasGo) return 'go'
360
+ if (hasPython) return 'python'
361
+ if (hasRuby) return 'ruby'
362
+ if (hasJava) return 'java'
363
+ if (hasSwift) return 'swift'
364
+ if (hasPhp) return 'php'
365
+ if (hasCSharp) return 'csharp'
366
+ if (hasCpp) return 'cpp'
367
+ if (hasC) return 'c'
368
+ if (hasPackageJson) return 'javascript'
303
369
  return 'unknown'
304
370
  }
305
371
 
@@ -310,24 +376,28 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
310
376
  ]
311
377
 
312
378
  const toPatterns = (lang: ProjectLanguage): string[] => {
313
- return getDiscoveryExtensions(lang).map(ext => `**/*${ext}`)
379
+ if (lang === 'polyglot') {
380
+ // For polyglot, use LANGUAGE_EXTENSIONS.polyglot directly
381
+ return getDiscoveryExtensions('polyglot' as any).map(ext => `**/*${ext}`)
382
+ }
383
+ return getDiscoveryExtensions(lang as any).map(ext => `**/*${ext}`)
314
384
  }
315
385
 
316
386
  switch (language) {
317
387
  case 'typescript':
318
388
  return {
319
- patterns: toPatterns(language),
320
- ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}'],
389
+ patterns: [...toPatterns(language), '**/*.js', '**/*.jsx'],
390
+ ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}', '**/venv/**', '**/.venv/**'],
321
391
  }
322
392
  case 'javascript':
323
393
  return {
324
- patterns: toPatterns(language),
325
- ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}'],
394
+ patterns: [...toPatterns(language), '**/*.ts', '**/*.tsx'],
395
+ ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/.next/**', '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}', '**/venv/**', '**/.venv/**'],
326
396
  }
327
397
  case 'python':
328
398
  return {
329
399
  patterns: toPatterns(language),
330
- ignore: [...commonIgnore, '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/test_*.py', '**/*_test.py'],
400
+ ignore: [...commonIgnore, '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/test_*.py', '**/*_test.py', '**/lib/site-packages/**'],
331
401
  }
332
402
  case 'go':
333
403
  return {
@@ -341,7 +411,7 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
341
411
  }
342
412
  case 'java':
343
413
  return {
344
- patterns: toPatterns(language),
414
+ patterns: [...toPatterns(language), '**/*.kt', '**/*.kts'],
345
415
  ignore: [...commonIgnore, '**/target/**', '**/.gradle/**', '**/Test*.java', '**/*Test.java'],
346
416
  }
347
417
  case 'swift':
@@ -352,34 +422,47 @@ export function getDiscoveryPatterns(language: ProjectLanguage): { patterns: str
352
422
  case 'ruby':
353
423
  return {
354
424
  patterns: toPatterns(language),
355
- ignore: [...commonIgnore, '**/vendor/**', '**/*_spec.rb', '**/spec/**'],
425
+ ignore: [...commonIgnore, '**/vendor/**', '**/*.gemspec'],
356
426
  }
357
427
  case 'php':
358
428
  return {
359
429
  patterns: toPatterns(language),
360
- ignore: [...commonIgnore, '**/vendor/**', '**/*Test.php'],
430
+ ignore: [...commonIgnore, '**/vendor/**', '**/tests/**', '**/Test*.php'],
361
431
  }
362
432
  case 'csharp':
363
433
  return {
364
434
  patterns: toPatterns(language),
365
- ignore: [...commonIgnore, '**/bin/**', '**/obj/**'],
435
+ ignore: [...commonIgnore, '**/bin/**', '**/obj/**', '**/*Test.cs'],
366
436
  }
367
- case 'cpp':
437
+ case 'c':
368
438
  return {
369
439
  patterns: toPatterns(language),
370
- ignore: [...commonIgnore, '**/build/**', '**/cmake-build-*/**'],
440
+ ignore: [...commonIgnore, '**/*.h'],
371
441
  }
372
- case 'c':
442
+ case 'cpp':
373
443
  return {
374
444
  patterns: toPatterns(language),
375
- ignore: [...commonIgnore, '**/build/**'],
445
+ ignore: [...commonIgnore, '**/build/**', '**/*.hpp'],
376
446
  }
377
- default:
378
- // Fallback: discover JS/TS (most common)
447
+ case 'polyglot':
379
448
  return {
380
449
  patterns: toPatterns(language),
381
- ignore: [...commonIgnore, '**/node_modules/**', '**/dist/**', '**/*.d.ts'],
450
+ ignore: [
451
+ ...commonIgnore,
452
+ '**/node_modules/**', '**/dist/**', '**/.next/**', '**/.nuxt/**', '**/.svelte-kit/**',
453
+ '**/__pycache__/**', '**/venv/**', '**/.venv/**', '**/.tox/**', '**/lib/site-packages/**',
454
+ '**/vendor/**', '**/target/**', '**/.gradle/**', '**/.build/**', '**/bin/**', '**/obj/**',
455
+ '**/*.d.ts', '**/*.test.{ts,js,tsx,jsx}', '**/*.spec.{ts,js,tsx,jsx}',
456
+ '**/test_*.py', '**/*_test.py', '**/Test*.java', '**/*Test.java', '**/*Test.cs',
457
+ ],
458
+ }
459
+ case 'unknown':
460
+ return {
461
+ patterns: ['**/*.{ts,tsx,js,jsx}'],
462
+ ignore: [...commonIgnore, '**/node_modules/**'],
382
463
  }
464
+ default:
465
+ return { patterns: [], ignore: commonIgnore }
383
466
  }
384
467
  }
385
468
 
@@ -431,29 +514,17 @@ export async function fileExists(filePath: string): Promise<boolean> {
431
514
 
432
515
  /**
433
516
  * Set up the .mikk directory structure in a project root.
517
+ * Only creates directories that are actually used.
434
518
  */
435
519
  export async function setupMikkDirectory(projectRoot: string): Promise<void> {
436
520
  const dirs = [
437
521
  '.mikk',
438
- '.mikk/fragments',
439
- '.mikk/diagrams',
440
- '.mikk/diagrams/modules',
441
- '.mikk/diagrams/capsules',
442
- '.mikk/diagrams/flows',
443
- '.mikk/diagrams/impact',
444
- '.mikk/diagrams/exposure',
445
- '.mikk/intent',
446
522
  '.mikk/cache',
523
+ '.mikk/transactions',
447
524
  ]
448
525
  for (const dir of dirs) {
449
526
  await fs.mkdir(path.join(projectRoot, dir), { recursive: true })
450
527
  }
451
-
452
- // Create .gitkeep in impact dir
453
- const impactKeep = path.join(projectRoot, '.mikk/diagrams/impact/.gitkeep')
454
- if (!await fileExists(impactKeep)) {
455
- await fs.writeFile(impactKeep, '', 'utf-8')
456
- }
457
528
  }
458
529
 
459
530
  // --- .mikkignore auto-generation --------------------------------------------
@@ -625,6 +696,20 @@ const LANGUAGE_IGNORE_TEMPLATES: Record<ProjectLanguage, string[]> = {
625
696
  '__tests__/',
626
697
  '',
627
698
  ],
699
+ polyglot: [
700
+ '# Multi-language project',
701
+ '**/node_modules/**',
702
+ '**/venv/**',
703
+ '**/.venv/**',
704
+ '**/__pycache__/**',
705
+ '**/site-packages/**',
706
+ '**/vendor/**',
707
+ '**/target/**',
708
+ '**/build/**',
709
+ '**/dist/**',
710
+ '**/.next/**',
711
+ '',
712
+ ],
628
713
  }
629
714
 
630
715
  /**
package/src/utils/json.ts CHANGED
@@ -1,3 +1,4 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import * as fs from 'node:fs/promises'
2
3
 
3
4
  /**
@@ -14,14 +14,38 @@ export type RegistryLanguage =
14
14
  | 'csharp'
15
15
  | 'c'
16
16
  | 'cpp'
17
+ | 'zig'
18
+ | 'elixir'
19
+ | 'haskell'
20
+ | 'scala'
21
+ | 'dart'
22
+ | 'lua'
23
+ | 'julia'
24
+ | 'clojure'
25
+ | 'fsharp'
26
+ | 'ocaml'
27
+ | 'perl'
28
+ | 'r'
29
+ | 'sql'
30
+ | 'terraform'
31
+ | 'shell'
32
+ | 'vue'
33
+ | 'svelte'
34
+ | 'jsx'
35
+ | 'tsx'
36
+ | 'polyglot'
17
37
  | 'unknown'
18
38
 
19
- const OXC_EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'] as const
39
+ const OXC_EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.vue', '.svelte'] as const
20
40
  const GO_EXTENSIONS = ['.go'] as const
21
41
  const TREE_SITTER_EXTENSIONS = [
22
42
  '.py', '.java', '.kt', '.kts', '.swift',
23
43
  '.c', '.h', '.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh',
24
44
  '.cs', '.rs', '.php', '.rb',
45
+ '.zig', '.ex', '.exs', '.hs', '.scala', '.sc',
46
+ '.dart', '.lua', '.jl', '.clj', '.cljs', '.fs', '.fsx',
47
+ '.ml', '.mli', '.pl', '.pm', '.r', '.R', '.sql',
48
+ '.tf', '.sh', '.bash', '.zsh',
25
49
  ] as const
26
50
 
27
51
  const PARSER_EXTENSIONS: Record<Exclude<ParserKind, 'unknown'>, readonly string[]> = {
@@ -31,19 +55,54 @@ const PARSER_EXTENSIONS: Record<Exclude<ParserKind, 'unknown'>, readonly string[
31
55
  }
32
56
 
33
57
  const LANGUAGE_EXTENSIONS: Record<RegistryLanguage, readonly string[]> = {
34
- typescript: ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'],
35
- javascript: ['.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx'],
36
- python: ['.py'],
58
+ typescript: ['.ts', '.tsx', '.mts', '.cts', '.js', '.jsx', '.mjs', '.cjs'],
59
+ javascript: ['.js', '.jsx', '.mjs', '.cjs'],
60
+ vue: ['.vue'],
61
+ svelte: ['.svelte'],
62
+ jsx: ['.jsx'],
63
+ tsx: ['.tsx'],
64
+ python: ['.py', '.pyw'],
37
65
  go: ['.go'],
38
66
  rust: ['.rs'],
39
67
  kotlin: ['.kt', '.kts'],
40
- java: ['.java', '.kt', '.kts'],
68
+ java: ['.java'],
41
69
  swift: ['.swift'],
42
70
  ruby: ['.rb'],
43
71
  php: ['.php'],
44
72
  csharp: ['.cs'],
45
73
  c: ['.c', '.h'],
46
74
  cpp: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh', '.h'],
75
+ zig: ['.zig'],
76
+ elixir: ['.ex', '.exs'],
77
+ haskell: ['.hs'],
78
+ scala: ['.scala', '.sc'],
79
+ dart: ['.dart'],
80
+ lua: ['.lua'],
81
+ julia: ['.jl'],
82
+ clojure: ['.clj', '.cljs', '.cljc'],
83
+ fsharp: ['.fs', '.fsx', '.fsi'],
84
+ ocaml: ['.ml', '.mli'],
85
+ perl: ['.pl', '.pm'],
86
+ r: ['.r', '.R'],
87
+ sql: ['.sql'],
88
+ terraform: ['.tf'],
89
+ shell: ['.sh', '.bash', '.zsh'],
90
+ polyglot: [
91
+ '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
92
+ '.py', '.vue', '.svelte',
93
+ '.go',
94
+ '.rs',
95
+ '.java', '.kt', '.kts',
96
+ '.swift',
97
+ '.rb',
98
+ '.php',
99
+ '.cs',
100
+ '.c', '.h', '.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh',
101
+ '.zig', '.ex', '.exs', '.hs', '.scala', '.sc',
102
+ '.dart', '.lua', '.jl', '.clj', '.cljs', '.cljc',
103
+ '.fs', '.fsx', '.fsi', '.ml', '.mli', '.pl', '.pm',
104
+ '.r', '.R', '.sql', '.tf', '.sh', '.bash', '.zsh',
105
+ ],
47
106
  unknown: ['.ts', '.tsx', '.js', '.jsx'],
48
107
  }
49
108
 
@@ -69,6 +128,38 @@ export function languageForExtension(ext: string): RegistryLanguage {
69
128
  return EXT_TO_LANGUAGE.get(ext.toLowerCase()) ?? 'unknown'
70
129
  }
71
130
 
131
+ const VALID_PARSED_FILE_LANGUAGES = new Set([
132
+ // Mainstream Languages (22)
133
+ 'javascript', 'typescript', 'python', 'java', 'csharp', 'cpp', 'c',
134
+ 'php', 'ruby', 'swift', 'go', 'kotlin', 'rust', 'dart', 'scala',
135
+ 'haskell', 'elixir', 'clojure', 'fsharp', 'ocaml', 'perl', 'r',
136
+ // Systems Languages
137
+ 'zig',
138
+ // Scripting Languages
139
+ 'lua', 'julia',
140
+ // Special Purpose
141
+ 'sql', 'terraform', 'shell',
142
+ // Web Frameworks
143
+ 'vue', 'svelte',
144
+ // Fallback
145
+ 'unknown'
146
+ ])
147
+
148
+ export function toParsedFileLanguage(lang: RegistryLanguage): ParsedFileLanguage {
149
+ return VALID_PARSED_FILE_LANGUAGES.has(lang)
150
+ ? lang as ParsedFileLanguage
151
+ : 'unknown'
152
+ }
153
+
154
+ export type ParsedFileLanguage =
155
+ | 'javascript' | 'typescript' | 'python' | 'java' | 'csharp' | 'cpp' | 'c'
156
+ | 'php' | 'ruby' | 'swift' | 'go' | 'kotlin' | 'rust' | 'dart' | 'scala'
157
+ | 'haskell' | 'elixir' | 'clojure' | 'fsharp' | 'ocaml' | 'perl' | 'r'
158
+ | 'zig' | 'lua' | 'julia'
159
+ | 'sql' | 'terraform' | 'shell'
160
+ | 'vue' | 'svelte'
161
+ | 'unknown'
162
+
72
163
  export function getParserExtensions(kind: Exclude<ParserKind, 'unknown'>): readonly string[] {
73
164
  return PARSER_EXTENSIONS[kind]
74
165
  }
@@ -4,7 +4,7 @@
4
4
  * Rules:
5
5
  * - Pattern with no glob chars (*, ?, {, [) → directory prefix match
6
6
  * "src/auth" matches "src/auth/jwt.ts" and "src/auth" itself
7
- * - "**" matches any depth
7
+ * - "**" matches any depth (zero or more directory segments)
8
8
  * - "*" matches within a single directory segment
9
9
  */
10
10
  export function minimatch(filePath: string, pattern: string): boolean {
@@ -17,12 +17,55 @@ export function minimatch(filePath: string, pattern: string): boolean {
17
17
  return normalizedPath === bare || normalizedPath.startsWith(bare + '/')
18
18
  }
19
19
 
20
- // Convert glob to regex
21
- const regexStr = normalizedPattern
20
+ // Handle patterns that start with ** - these should match anywhere in the path
21
+ // e.g., **/venv/** should match if there's a /venv/ segment anywhere
22
+ if (normalizedPattern.startsWith('**/')) {
23
+ const rest = normalizedPattern.slice(3) // Remove **/
24
+ // Check if the rest of the pattern appears as a path segment
25
+ // For **/venv/**, check if /venv/ is in the path
26
+ // For **/node_modules/**, check if /node_modules/ is in the path
27
+ const segments = normalizedPath.split('/')
28
+ const patternSegments = rest.split('/').filter(Boolean)
29
+
30
+ // Check if pattern segments appear consecutively in path
31
+ for (let i = 0; i <= segments.length - patternSegments.length; i++) {
32
+ let match = true
33
+ for (let j = 0; j < patternSegments.length; j++) {
34
+ const pseg = patternSegments[j].replace(/\*/g, '[^/]*')
35
+ if (!new RegExp('^' + pseg + '$', 'i').test(segments[i + j])) {
36
+ match = false
37
+ break
38
+ }
39
+ }
40
+ if (match) return true
41
+ }
42
+ return false
43
+ }
44
+
45
+ // Convert glob to regex (for patterns not starting with **)
46
+ let regexStr = normalizedPattern
47
+ .replace(/\[/g, '\\[')
48
+ .replace(/\]/g, '\\]')
22
49
  .replace(/\./g, '\\.')
23
- .replace(/\*\*\//g, '(?:.+/)?')
24
- .replace(/\*\*/g, '.*')
25
- .replace(/\*/g, '[^/]*')
50
+
51
+ // Replace **/ at end with (?:[^/]+/)* - matches zero or more dir segments ending with /
52
+ // But we need to handle path/** specifically - matching path/file, path/dir/file, etc.
53
+
54
+ // Handle trailing /** specifically - should match path itself and anything under it
55
+ if (normalizedPattern.endsWith('/**')) {
56
+ const base = normalizedPattern.slice(0, -3) // Remove /**
57
+ // Match either exact base or base + anything
58
+ return normalizedPath === base || normalizedPath.startsWith(base + '/')
59
+ }
60
+
61
+ // Replace **/ with (?:[^/]+/)* - matches zero or more directory segments
62
+ regexStr = regexStr.replace(/\*\*\//g, '(?:[^/]+/)*')
63
+ // Replace trailing ** with (?:[^/]+/)*[^/]+ - matches zero or more at end
64
+ regexStr = regexStr.replace(/\*\*$/g, '(?:[^/]+/)*[^/]+')
65
+ // Standalone **
66
+ regexStr = regexStr.replace(/\*\*/g, '(?:[^/]+/)*[^/]+')
67
+ // Single * matches any characters except slash
68
+ regexStr = regexStr.replace(/\*/g, '[^/]*')
26
69
 
27
70
  return new RegExp(`^${regexStr}$`, 'i').test(normalizedPath)
28
71
  }
@@ -0,0 +1,26 @@
1
+ export function normalizeSlashes(filePath: string): string {
2
+ return filePath.replace(/\\/g, '/')
3
+ }
4
+
5
+ export function normalizePath(filePath: string, lowercase: boolean = true): string {
6
+ const normalized = normalizeSlashes(filePath)
7
+ return lowercase ? normalized.toLowerCase() : normalized
8
+ }
9
+
10
+ export function normalizePathQuiet(filePath: string): string {
11
+ return normalizeSlashes(filePath).toLowerCase()
12
+ }
13
+
14
+ export function getPathKey(filePath: string): string {
15
+ return normalizePath(filePath, true)
16
+ }
17
+
18
+ export function pathsEqual(a: string, b: string): boolean {
19
+ return normalizePathQuiet(a) === normalizePathQuiet(b)
20
+ }
21
+
22
+ export function isSubPath(child: string, parent: string): boolean {
23
+ const childNorm = normalizePathQuiet(child)
24
+ const parentNorm = normalizePathQuiet(parent)
25
+ return childNorm.startsWith(parentNorm + '/') || childNorm === parentNorm
26
+ }
@@ -1,9 +1,10 @@
1
1
  import { describe, it, expect } from 'bun:test'
2
2
  import { DeadCodeDetector } from '../src/graph/dead-code-detector'
3
- import { buildTestGraph, mockFunction } from './helpers'
4
- import { GraphBuilder } from '../src/graph/graph-builder'
3
+ import { buildTestGraph } from './helpers'
5
4
  import type { MikkLock } from '../src/contract/schema'
6
5
 
6
+ const _GraphBuilder = { addNode: () => {}, addEdge: () => {}, build: () => new Map() }
7
+
7
8
  /** Helper to generate a dummy lock file from graph nodes for the detector */
8
9
  function generateDummyLock(graphNodes: Map<string, any>): MikkLock {
9
10
  const lock: MikkLock = {