@getmikk/core 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +4 -4
  2. package/package.json +2 -1
  3. package/src/analysis/index.ts +9 -0
  4. package/src/analysis/taint-analysis.ts +419 -0
  5. package/src/analysis/type-flow.ts +247 -0
  6. package/src/cache/incremental-cache.ts +278 -0
  7. package/src/cache/index.ts +1 -0
  8. package/src/contract/contract-generator.ts +31 -3
  9. package/src/contract/contract-reader.ts +1 -0
  10. package/src/contract/lock-compiler.ts +125 -12
  11. package/src/contract/schema.ts +4 -0
  12. package/src/error-handler.ts +2 -1
  13. package/src/graph/cluster-detector.ts +2 -4
  14. package/src/graph/dead-code-detector.ts +303 -117
  15. package/src/graph/graph-builder.ts +21 -161
  16. package/src/graph/impact-analyzer.ts +1 -0
  17. package/src/graph/index.ts +2 -0
  18. package/src/graph/rich-function-index.ts +1080 -0
  19. package/src/graph/symbol-table.ts +252 -0
  20. package/src/hash/hash-store.ts +1 -0
  21. package/src/index.ts +4 -0
  22. package/src/parser/base-extractor.ts +19 -0
  23. package/src/parser/boundary-checker.ts +31 -12
  24. package/src/parser/error-recovery.ts +647 -0
  25. package/src/parser/function-body-extractor.ts +248 -0
  26. package/src/parser/go/go-extractor.ts +249 -676
  27. package/src/parser/index.ts +138 -295
  28. package/src/parser/language-registry.ts +57 -0
  29. package/src/parser/oxc-parser.ts +166 -28
  30. package/src/parser/oxc-resolver.ts +179 -11
  31. package/src/parser/parser-constants.ts +1 -0
  32. package/src/parser/rust/rust-extractor.ts +109 -0
  33. package/src/parser/tree-sitter/parser.ts +400 -66
  34. package/src/parser/tree-sitter/queries.ts +106 -10
  35. package/src/parser/types.ts +20 -1
  36. package/src/search/bm25.ts +21 -8
  37. package/src/search/direct-search.ts +472 -0
  38. package/src/search/embedding-provider.ts +249 -0
  39. package/src/search/index.ts +12 -0
  40. package/src/search/semantic-search.ts +435 -0
  41. package/src/security/index.ts +1 -0
  42. package/src/security/scanner.ts +342 -0
  43. package/src/utils/artifact-transaction.ts +1 -0
  44. package/src/utils/atomic-write.ts +1 -0
  45. package/src/utils/errors.ts +89 -4
  46. package/src/utils/fs.ts +150 -65
  47. package/src/utils/json.ts +1 -0
  48. package/src/utils/language-registry.ts +96 -5
  49. package/src/utils/minimatch.ts +49 -6
  50. package/src/utils/path.ts +26 -0
  51. package/tests/dead-code.test.ts +3 -2
  52. package/tests/direct-search.test.ts +435 -0
  53. package/tests/error-recovery.test.ts +143 -0
  54. package/tests/fixtures/simple-api/src/index.ts +1 -1
  55. package/tests/go-parser.test.ts +19 -335
  56. package/tests/js-parser.test.ts +18 -1089
  57. package/tests/language-registry-all.test.ts +276 -0
  58. package/tests/language-registry.test.ts +6 -4
  59. package/tests/parse-diagnostics.test.ts +9 -96
  60. package/tests/parser.test.ts +42 -771
  61. package/tests/polyglot-parser.test.ts +117 -0
  62. package/tests/rich-function-index.test.ts +703 -0
  63. package/tests/tree-sitter-parser.test.ts +108 -80
  64. package/tests/ts-parser.test.ts +8 -8
  65. package/tests/verification.test.ts +175 -0
  66. package/src/parser/base-parser.ts +0 -16
  67. package/src/parser/go/go-parser.ts +0 -43
  68. package/src/parser/javascript/js-extractor.ts +0 -278
  69. package/src/parser/javascript/js-parser.ts +0 -101
  70. package/src/parser/typescript/ts-extractor.ts +0 -447
  71. package/src/parser/typescript/ts-parser.ts +0 -36
@@ -1,8 +1,10 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import * as path from 'node:path'
2
3
  import { TreeSitterResolver } from './resolver.js'
4
+ import { LanguageRegistry } from '../language-registry.js'
3
5
  import { createRequire } from 'node:module'
4
6
  import { hashContent } from '../../hash/file-hasher.js'
5
- import { BaseParser } from '../base-parser.js'
7
+ import { BaseExtractor } from '../base-extractor.js'
6
8
  import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport, ParsedGeneric } from '../types.js'
7
9
  import * as Queries from './queries.js'
8
10
 
@@ -15,20 +17,106 @@ const _require = getRequire()
15
17
  let Parser: any = null
16
18
  let Language: any = null
17
19
  let initialized = false
18
- let initPromise: Promise<void> | null = null
19
-
20
- try {
21
- const ParserModule = _require('web-tree-sitter')
22
- Parser = ParserModule
23
- if (ParserModule.init) {
24
- initPromise = ParserModule.init().then(() => {
25
- Language = ParserModule.Language
26
- initialized = true
27
- }).catch(() => { /* ignore */ })
28
- } else if (ParserModule.default?.Language) {
29
- Language = ParserModule.default.Language
20
+ let initPromise: Promise<boolean> | null = null
21
+
22
+ function isValidTreeSitterModule(module: any): boolean {
23
+ if (!module) return false
24
+ if (module.HEAP8 || module.HEAP16 || module.HEAP32) return false
25
+ const hasInit = typeof module.init === 'function'
26
+ const hasLanguage = typeof module.Language !== 'undefined'
27
+ const hasDefault = module.default?.Language
28
+ const isFunctionWithInit = typeof module === 'function' && module.prototype?.init
29
+ const isFunctionWithLanguage = typeof module === 'function' && module.prototype?.Language
30
+ return hasInit || hasLanguage || !!hasDefault || isFunctionWithInit || isFunctionWithLanguage
31
+ }
32
+
33
+ async function ensureInitialized(): Promise<boolean> {
34
+ if (initialized && Language) return true
35
+
36
+ const attemptInit = async (): Promise<boolean> => {
37
+ try {
38
+ let ParserModule = _require('web-tree-sitter')
39
+ if (!ParserModule) return false
40
+
41
+ if (typeof ParserModule === 'function') {
42
+ if (ParserModule.prototype?.Language) {
43
+ Language = ParserModule.prototype.Language
44
+ Parser = ParserModule
45
+ initialized = true
46
+ return true
47
+ }
48
+ if (ParserModule.prototype?.init) {
49
+ await ParserModule.prototype.init()
50
+ Language = ParserModule.prototype.Language
51
+ Parser = ParserModule
52
+ initialized = !!Language
53
+ return initialized
54
+ }
55
+ }
56
+
57
+ if (!isValidTreeSitterModule(ParserModule)) {
58
+ ParserModule = ParserModule?.default || ParserModule
59
+ }
60
+
61
+ if (!isValidTreeSitterModule(ParserModule)) {
62
+ const moduleCache = _require.cache
63
+ const keys = Object.keys(moduleCache || {})
64
+ for (const key of keys) {
65
+ if (key.includes('web-tree-sitter')) {
66
+ delete moduleCache[key]
67
+ }
68
+ }
69
+ ParserModule = _require('web-tree-sitter')
70
+ if (typeof ParserModule === 'function' && ParserModule.prototype?.Language) {
71
+ Language = ParserModule.prototype.Language
72
+ Parser = ParserModule
73
+ initialized = true
74
+ return true
75
+ }
76
+ if (!isValidTreeSitterModule(ParserModule)) {
77
+ ParserModule = ParserModule?.default || ParserModule
78
+ }
79
+ }
80
+
81
+ if (!isValidTreeSitterModule(ParserModule)) {
82
+ return false
83
+ }
84
+
85
+ Parser = ParserModule
86
+
87
+ if (typeof ParserModule.init === 'function') {
88
+ await ParserModule.init()
89
+ Language = ParserModule.Language
90
+ initialized = !!Language
91
+ } else if (ParserModule.default?.Language) {
92
+ Language = ParserModule.default.Language
93
+ initialized = true
94
+ } else if (ParserModule.Language) {
95
+ Language = ParserModule.Language
96
+ initialized = true
97
+ }
98
+
99
+ return initialized && !!Language
100
+ } catch (err) {
101
+ console.warn('[tree-sitter] Initialization failed:', err)
102
+ return false
103
+ }
30
104
  }
31
- } catch { /* web-tree-sitter not installed */ }
105
+
106
+ if (initPromise) {
107
+ await initPromise.catch(() => {})
108
+ if (!initialized && Language) {
109
+ initPromise = attemptInit()
110
+ const retryResult = await initPromise
111
+ return retryResult
112
+ }
113
+ return initialized && !!Language
114
+ }
115
+
116
+ initPromise = attemptInit()
117
+ const result = await initPromise
118
+ return result
119
+ }
32
120
 
33
121
  function isExportedByLanguage(ext: string, name: string, nodeText: string): boolean {
34
122
  switch (ext) {
@@ -119,7 +207,35 @@ function findAllChildren(node: any, predicate: (n: any) => boolean): any[] {
119
207
  return results
120
208
  }
121
209
 
122
- function extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
210
+ function extractDecoratorsFromNode(defNode: any): string[] {
211
+ const decorators: string[] = []
212
+ if (!defNode?.children) return decorators
213
+
214
+ for (const child of defNode.children) {
215
+ if (child.type === 'decorator' || child.type === 'attribute' || child.type === 'annotation') {
216
+ const nameNode = findFirstChild(child, n =>
217
+ n.type === 'identifier' ||
218
+ n.type === 'attribute' ||
219
+ n.type === 'decorator_target'
220
+ )
221
+ if (nameNode?.text) {
222
+ decorators.push(nameNode.text)
223
+ }
224
+ }
225
+ if (child.type === 'expression_statement') {
226
+ const innerChild = findFirstChild(child, n => n.type === 'decorator' || n.type === 'attribute')
227
+ if (innerChild) {
228
+ const nameNode = findFirstChild(innerChild, n => n.type === 'identifier')
229
+ if (nameNode?.text) {
230
+ decorators.push(nameNode.text)
231
+ }
232
+ }
233
+ }
234
+ }
235
+ return decorators
236
+ }
237
+
238
+ function _extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
123
239
  const generics: ParsedGeneric[] = []
124
240
  if (!defNode) return generics
125
241
 
@@ -179,7 +295,7 @@ function assignCallsToFunctions(
179
295
  return unassigned
180
296
  }
181
297
 
182
- export class TreeSitterParser extends BaseParser {
298
+ export class TreeSitterParser extends BaseExtractor {
183
299
  private parser: any = null
184
300
  private languages = new Map<string, any>()
185
301
  private nameCounter = new Map<string, number>()
@@ -190,10 +306,9 @@ export class TreeSitterParser extends BaseParser {
190
306
  }
191
307
 
192
308
  private async init() {
193
- if (!this.parser) {
194
- if (!Parser || !initPromise) return
195
- await initPromise.catch(() => {})
196
- if (!Language) return
309
+ if (this.parser) return
310
+ const ready = await ensureInitialized()
311
+ if (ready && Parser) {
197
312
  this.parser = new Parser()
198
313
  }
199
314
  }
@@ -203,39 +318,57 @@ export class TreeSitterParser extends BaseParser {
203
318
  return Boolean(this.parser)
204
319
  }
205
320
 
206
- async parse(filePath: string, content: string): Promise<ParsedFile> {
207
- this.nameCounter.clear()
321
+ async extract(filePath: string, content: string): Promise<ParsedFile> {
208
322
  await this.init()
209
- const ext = path.extname(filePath).toLowerCase()
210
-
323
+
211
324
  if (!this.parser) {
212
- return this.buildEmptyFile(filePath, content, ext)
325
+ console.warn('[tree-sitter] Parser not initialized')
326
+ return this.buildEmptyFile(filePath, content, path.extname(filePath))
213
327
  }
214
328
 
329
+ const ext = path.extname(filePath).toLowerCase()
215
330
  const config = await this.getLanguageConfig(ext)
216
-
331
+
217
332
  if (!config || !config.lang) {
333
+ console.warn('[tree-sitter] Language not available for', ext)
218
334
  return this.buildEmptyFile(filePath, content, ext)
219
335
  }
220
336
 
221
- try {
222
- return this.parseWithConfig(filePath, content, ext, config)
223
- } catch (err) {
224
- console.warn(`Parse error for ${filePath}:`, err)
225
- return this.buildEmptyFile(filePath, content, ext)
226
- }
337
+ const result = await this.parseWithConfig(filePath, content, ext, config)
338
+
339
+ return result
227
340
  }
228
341
 
229
342
  private async parseWithConfig(filePath: string, content: string, ext: string, config: any): Promise<ParsedFile> {
230
343
  this.parser!.setLanguage(config.lang)
231
- const tree = this.parser!.parse(content)
232
- const query = config.lang.query(config.query)
233
344
 
345
+ let tree: any = null
346
+ try {
347
+ tree = this.parser!.parse(content)
348
+ } catch (parseErr) {
349
+ console.warn(`[tree-sitter] Parse failed for ${ext}:`, parseErr instanceof Error ? parseErr.message : String(parseErr))
350
+ return this.buildEmptyFile(filePath, content, ext)
351
+ }
352
+
353
+ let query: any = null
354
+ try {
355
+ query = config.lang.query(config.query)
356
+ } catch (queryErr) {
357
+ console.warn(`[tree-sitter] Query compilation failed for ${ext}:`, queryErr instanceof Error ? queryErr.message : String(queryErr))
358
+ return this.buildEmptyFile(filePath, content, ext)
359
+ }
360
+
234
361
  if (!query) {
235
362
  return this.buildEmptyFile(filePath, content, ext)
236
363
  }
237
364
 
238
- const matches = query.matches(tree.rootNode)
365
+ let matches: any[] = []
366
+ try {
367
+ matches = query.matches(tree.rootNode)
368
+ } catch (matchErr) {
369
+ console.warn(`[tree-sitter] Query execution failed for ${ext}:`, matchErr instanceof Error ? matchErr.message : String(matchErr))
370
+ return this.buildEmptyFile(filePath, content, ext)
371
+ }
239
372
 
240
373
  const functions: ParsedFunction[] = []
241
374
  const classesMap = new Map<string, ParsedClass>()
@@ -321,7 +454,6 @@ export class TreeSitterParser extends BaseParser {
321
454
 
322
455
  // --- Generic types ---
323
456
  if (captures['generic.name'] || captures['generic.arg']) {
324
- const genName = captures['generic.name']?.text || ''
325
457
  const genArg = captures['generic.arg']?.text || ''
326
458
  if (genArg && !generics.some(g => g.name === genArg)) {
327
459
  generics.push({
@@ -361,6 +493,7 @@ export class TreeSitterParser extends BaseParser {
361
493
 
362
494
  const returnType = extractReturnType(ext, defNode, nodeText)
363
495
  const params = extractParamsFromNode(defNode)
496
+ const decorators = extractDecoratorsFromNode(defNode)
364
497
 
365
498
  functions.push({
366
499
  id: fnId,
@@ -373,6 +506,7 @@ export class TreeSitterParser extends BaseParser {
373
506
  isExported: exported,
374
507
  isAsync,
375
508
  calls: [],
509
+ decorators: decorators.length > 0 ? decorators : undefined,
376
510
  hash: hashContent(nodeText),
377
511
  purpose: extractDocComment(content, startLine),
378
512
  edgeCasesHandled: [],
@@ -402,10 +536,6 @@ export class TreeSitterParser extends BaseParser {
402
536
  const clsId = `class:${filePath}:${clsName}`
403
537
 
404
538
  if (!classesMap.has(clsId)) {
405
- const isEnum = type === 'definition.enum'
406
- const isStruct = type === 'definition.struct'
407
- const isUnion = type === 'definition.union'
408
-
409
539
  classesMap.set(clsId, {
410
540
  id: clsId,
411
541
  name: clsName,
@@ -519,13 +649,71 @@ export class TreeSitterParser extends BaseParser {
519
649
  try {
520
650
  const nameForFile = name.replace(/-/g, '_')
521
651
 
522
- // Try multiple possible WASM locations
523
- const possiblePaths = [
524
- path.resolve('node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
525
- path.resolve('./node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
526
- path.resolve(process.cwd(), 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`),
527
- path.resolve(process.cwd(), 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`),
528
- ]
652
+ // Try multiple possible WASM locations, including parent directories and siblings for monorepos
653
+ const baseDirs = new Set<string>()
654
+ baseDirs.add(process.cwd())
655
+
656
+ // Add parent directories (up to 4 levels) for monorepo setups
657
+ let current = process.cwd()
658
+ let parentDir = ''
659
+ for (let i = 0; i < 4; i++) {
660
+ parentDir = path.dirname(current)
661
+ if (parentDir === current) break
662
+ baseDirs.add(parentDir)
663
+ baseDirs.add(path.join(parentDir, 'node_modules'))
664
+
665
+ // Also check sibling directories in the parent for monorepo setups
666
+ // Also check subdirs like Mesh/packages/core/node_modules
667
+ try {
668
+ const fs = await import('node:fs')
669
+ const entries = fs.readdirSync(parentDir, { withFileTypes: true })
670
+ for (const entry of entries) {
671
+ if (entry.isDirectory() && entry.name !== path.basename(current)) {
672
+ baseDirs.add(path.join(parentDir, entry.name, 'node_modules'))
673
+ // Also check for packages/*/node_modules patterns
674
+ const subPath = path.join(parentDir, entry.name, 'packages')
675
+ if (fs.existsSync(subPath)) {
676
+ try {
677
+ const pkgEntries = fs.readdirSync(subPath, { withFileTypes: true })
678
+ for (const pkg of pkgEntries) {
679
+ if (pkg.isDirectory()) {
680
+ baseDirs.add(path.join(subPath, pkg.name, 'node_modules'))
681
+ }
682
+ }
683
+ } catch { /* skip */ }
684
+ }
685
+ }
686
+ }
687
+ } catch { /* skip */ }
688
+
689
+ current = parentDir
690
+ }
691
+
692
+ const possiblePaths: string[] = []
693
+ for (const baseDir of baseDirs) {
694
+ if (!baseDir) continue
695
+ const p = path.join(baseDir, 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`)
696
+ possiblePaths.push(p)
697
+
698
+ // Also check Mesh/packages/*/node_modules directly
699
+ const meshBase = path.join(baseDir, '..', 'Mesh', 'packages')
700
+ try {
701
+ const fs = await import('node:fs')
702
+ if (fs.existsSync(meshBase)) {
703
+ const entries = fs.readdirSync(meshBase, { withFileTypes: true })
704
+ for (const entry of entries) {
705
+ if (entry.isDirectory()) {
706
+ possiblePaths.push(path.join(meshBase, entry.name, 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`))
707
+ }
708
+ }
709
+ }
710
+ } catch { /* skip */ }
711
+ }
712
+
713
+ if (process.env.MIKK_DEBUG) {
714
+ console.log('[tree-sitter] Searching for:', name)
715
+ console.log('[tree-sitter] Paths checked:', possiblePaths.slice(0, 5).map(p => p.slice(-50)))
716
+ }
529
717
 
530
718
  let wasmPath = ''
531
719
  for (const p of possiblePaths) {
@@ -563,7 +751,6 @@ export class TreeSitterParser extends BaseParser {
563
751
 
564
752
  if (!wasmPath) {
565
753
  // WASM not found - but don't mark as permanent error, just skip this language
566
- console.warn(`Tree-sitter WASM not found for ${name}`)
567
754
  return null
568
755
  }
569
756
 
@@ -596,6 +783,14 @@ export class TreeSitterParser extends BaseParser {
596
783
  }
597
784
 
598
785
  private async getLanguageConfig(ext: string) {
786
+ // Ensure parser is initialized first
787
+ await this.init()
788
+
789
+ if (!this.parser) {
790
+ console.warn('[tree-sitter] Parser not initialized, returning null for', ext)
791
+ return null
792
+ }
793
+
599
794
  switch (ext) {
600
795
  case '.py':
601
796
  return { lang: await this.loadLang('python'), query: Queries.PYTHON_QUERIES }
@@ -604,8 +799,13 @@ export class TreeSitterParser extends BaseParser {
604
799
  case '.kt':
605
800
  case '.kts':
606
801
  return { lang: await this.loadLang('kotlin'), query: Queries.KOTLIN_QUERIES }
802
+ case '.scala':
803
+ case '.sc':
804
+ return { lang: await this.loadLang('scala'), query: Queries.SCALA_QUERIES }
607
805
  case '.swift':
608
806
  return { lang: await this.loadLang('swift'), query: Queries.SWIFT_QUERIES }
807
+ case '.dart':
808
+ return { lang: await this.loadLang('dart'), query: Queries.SWIFT_QUERIES }
609
809
  case '.c':
610
810
  case '.h':
611
811
  return { lang: await this.loadLang('c'), query: Queries.C_QUERIES }
@@ -622,35 +822,169 @@ export class TreeSitterParser extends BaseParser {
622
822
  return { lang: await this.loadLang('go'), query: Queries.GO_QUERIES }
623
823
  case '.rs':
624
824
  return { lang: await this.loadLang('rust'), query: Queries.RUST_QUERIES }
825
+ case '.zig':
826
+ return { lang: await this.loadLang('zig'), query: Queries.ZIG_QUERIES }
625
827
  case '.php':
626
828
  return { lang: await this.loadLang('php'), query: Queries.PHP_QUERIES }
627
829
  case '.rb':
628
830
  return { lang: await this.loadLang('ruby'), query: Queries.RUBY_QUERIES }
831
+ case '.hs':
832
+ return { lang: await this.loadLang('haskell'), query: Queries.HASKELL_QUERIES }
833
+ case '.ex':
834
+ case '.exs':
835
+ return { lang: await this.loadLang('elixir'), query: Queries.ELIXIR_QUERIES }
836
+ case '.clj':
837
+ case '.cljs':
838
+ case '.cljc':
839
+ return { lang: await this.loadLang('clojure'), query: Queries.CLOJURE_QUERIES }
840
+ case '.fs':
841
+ case '.fsx':
842
+ case '.fsi':
843
+ return { lang: await this.loadLang('fsharp'), query: Queries.FSHARP_QUERIES }
844
+ case '.ml':
845
+ case '.mli':
846
+ return { lang: await this.loadLang('ocaml'), query: Queries.OCAML_QUERIES }
847
+ case '.pl':
848
+ case '.pm':
849
+ return { lang: await this.loadLang('perl'), query: Queries.PERL_QUERIES }
850
+ case '.r':
851
+ case '.R':
852
+ return { lang: await this.loadLang('r'), query: Queries.R_QUERIES }
853
+ case '.jl':
854
+ return { lang: await this.loadLang('julia'), query: Queries.JULIA_QUERIES }
855
+ case '.lua':
856
+ return { lang: await this.loadLang('lua'), query: Queries.LUA_QUERIES }
857
+ case '.sql':
858
+ return { lang: await this.loadLang('sql'), query: Queries.SQL_QUERIES }
859
+ case '.tf':
860
+ return { lang: await this.loadLang('hcl'), query: Queries.HCL_QUERIES }
861
+ case '.sh':
862
+ case '.bash':
863
+ case '.zsh':
864
+ return { lang: await this.loadLang('bash'), query: Queries.BASH_QUERIES }
629
865
  default:
630
866
  return null
631
867
  }
632
868
  }
633
869
  }
634
870
 
871
+ // Register Tree-sitter for all supported languages (22+ languages)
872
+ const tsParser = new TreeSitterParser();
873
+ const registry = LanguageRegistry.getInstance();
874
+
875
+ const standardFeatures = {
876
+ hasTypeSystem: true,
877
+ hasGenerics: true,
878
+ hasMacros: false,
879
+ hasAnnotations: false,
880
+ hasPatternMatching: true,
881
+ };
882
+
883
+ const functionalFeatures = {
884
+ hasTypeSystem: true,
885
+ hasGenerics: true,
886
+ hasMacros: false,
887
+ hasAnnotations: false,
888
+ hasPatternMatching: false,
889
+ };
890
+
891
+ const scriptingFeatures = {
892
+ hasTypeSystem: false,
893
+ hasGenerics: false,
894
+ hasMacros: false,
895
+ hasAnnotations: false,
896
+ hasPatternMatching: false,
897
+ };
898
+
899
+ // All 22+ supported languages
900
+ const languages: Array<{ name: string; extensions: string[]; features: typeof standardFeatures }> = [
901
+ // JVM Languages
902
+ { name: 'java', extensions: ['.java'], features: standardFeatures },
903
+ { name: 'kotlin', extensions: ['.kt', '.kts'], features: standardFeatures },
904
+ { name: 'scala', extensions: ['.scala', '.sc'], features: standardFeatures },
905
+
906
+ // Apple Languages
907
+ { name: 'swift', extensions: ['.swift'], features: standardFeatures },
908
+ { name: 'dart', extensions: ['.dart'], features: standardFeatures },
909
+
910
+ // C Family
911
+ { name: 'c', extensions: ['.c', '.h'], features: standardFeatures },
912
+ { name: 'cpp', extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh'], features: standardFeatures },
913
+ { name: 'csharp', extensions: ['.cs'], features: standardFeatures },
914
+
915
+ // Systems Languages
916
+ { name: 'rust', extensions: ['.rs'], features: standardFeatures },
917
+ { name: 'zig', extensions: ['.zig'], features: standardFeatures },
918
+
919
+ // Web Languages
920
+ { name: 'php', extensions: ['.php'], features: standardFeatures },
921
+ { name: 'ruby', extensions: ['.rb'], features: scriptingFeatures },
922
+
923
+ // Scripting Languages
924
+ { name: 'python', extensions: ['.py', '.pyw'], features: scriptingFeatures },
925
+ { name: 'lua', extensions: ['.lua'], features: scriptingFeatures },
926
+
927
+ // Functional Languages
928
+ { name: 'haskell', extensions: ['.hs'], features: functionalFeatures },
929
+ { name: 'elixir', extensions: ['.ex', '.exs'], features: functionalFeatures },
930
+ { name: 'clojure', extensions: ['.clj', '.cljs', '.cljc'], features: functionalFeatures },
931
+
932
+ // .NET Family
933
+ { name: 'fsharp', extensions: ['.fs', '.fsx', '.fsi'], features: standardFeatures },
934
+
935
+ // ML Family
936
+ { name: 'ocaml', extensions: ['.ml', '.mli'], features: functionalFeatures },
937
+
938
+ // Other Languages
939
+ { name: 'perl', extensions: ['.pl', '.pm'], features: scriptingFeatures },
940
+ { name: 'r', extensions: ['.r', '.R'], features: scriptingFeatures },
941
+ { name: 'julia', extensions: ['.jl'], features: scriptingFeatures },
942
+
943
+ // Config/Special Purpose
944
+ { name: 'sql', extensions: ['.sql'], features: scriptingFeatures },
945
+ { name: 'terraform', extensions: ['.tf'], features: scriptingFeatures },
946
+ { name: 'shell', extensions: ['.sh', '.bash', '.zsh'], features: scriptingFeatures },
947
+ ];
948
+
949
+ for (const lang of languages) {
950
+ registry.register({
951
+ ...lang,
952
+ treeSitterGrammar: `tree-sitter-${lang.name}`,
953
+ extractor: tsParser,
954
+ semanticFeatures: lang.features
955
+ });
956
+ }
957
+
635
958
  function extensionToLanguage(ext: string): ParsedFile['language'] {
636
- switch (ext) {
637
- case '.py': return 'python'
638
- case '.java': return 'java'
639
- case '.kt':
640
- case '.kts':
641
- return 'kotlin'
642
- case '.swift':
643
- return 'swift'
644
- case '.c': case '.h': return 'c'
645
- case '.cpp': case '.cc': case '.hpp': return 'cpp'
646
- case '.cxx': case '.hxx': case '.hh': return 'cpp'
647
- case '.cs': return 'csharp'
648
- case '.go': return 'go'
649
- case '.rs': return 'rust'
650
- case '.php': return 'php'
651
- case '.rb': return 'ruby'
652
- default: return 'unknown'
653
- }
959
+ const langMap: Record<string, ParsedFile['language']> = {
960
+ '.py': 'python', '.pyw': 'python',
961
+ '.java': 'java',
962
+ '.kt': 'kotlin', '.kts': 'kotlin',
963
+ '.scala': 'scala', '.sc': 'scala',
964
+ '.swift': 'swift',
965
+ '.dart': 'dart',
966
+ '.c': 'c', '.h': 'c',
967
+ '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp', '.hh': 'cpp',
968
+ '.cs': 'csharp',
969
+ '.rs': 'rust',
970
+ '.zig': 'rust',
971
+ '.php': 'php',
972
+ '.rb': 'ruby',
973
+ '.go': 'go',
974
+ '.hs': 'haskell',
975
+ '.ex': 'elixir', '.exs': 'elixir',
976
+ '.clj': 'clojure', '.cljs': 'clojure', '.cljc': 'clojure',
977
+ '.fs': 'csharp', '.fsx': 'csharp', '.fsi': 'csharp',
978
+ '.ml': 'ocaml', '.mli': 'ocaml',
979
+ '.pl': 'perl', '.pm': 'perl',
980
+ '.r': 'r', '.R': 'r',
981
+ '.jl': 'julia',
982
+ '.lua': 'lua',
983
+ '.sql': 'sql',
984
+ '.tf': 'terraform',
985
+ '.sh': 'shell', '.bash': 'shell', '.zsh': 'shell',
986
+ };
987
+ return langMap[ext] ?? 'unknown';
654
988
  }
655
989
 
656
990
  function extractReturnType(ext: string, defNode: any, nodeText: string): string {